devlyn-cli 2.2.2 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/CLAUDE.md +4 -4
- package/README.md +85 -34
- package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +61 -44
- package/benchmark/auto-resolve/BENCHMARK-RESULTS.md +341 -0
- package/benchmark/auto-resolve/README.md +307 -44
- package/benchmark/auto-resolve/RUBRIC.md +23 -14
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +7 -3
- package/benchmark/auto-resolve/fixtures/F10-persist-write-collision/NOTES.md +8 -3
- package/benchmark/auto-resolve/fixtures/F11-batch-import-all-or-nothing/NOTES.md +8 -3
- package/benchmark/auto-resolve/fixtures/F12-webhook-raw-body-signature/NOTES.md +10 -4
- package/benchmark/auto-resolve/fixtures/F15-frozen-diff-race-review/NOTES.md +10 -4
- package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/NOTES.md +12 -0
- package/benchmark/auto-resolve/fixtures/F16-cli-quote-tax-rules/spec.md +6 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +7 -4
- package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/NOTES.md +12 -0
- package/benchmark/auto-resolve/fixtures/F21-cli-scheduler-priority/spec.md +6 -0
- package/benchmark/auto-resolve/fixtures/F22-cli-ledger-close/NOTES.md +8 -0
- package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/NOTES.md +12 -0
- package/benchmark/auto-resolve/fixtures/F23-cli-fulfillment-wave/spec.md +6 -0
- package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/NOTES.md +16 -4
- package/benchmark/auto-resolve/fixtures/F25-cli-cart-promotion-rules/spec.md +7 -0
- package/benchmark/auto-resolve/fixtures/F26-cli-payout-ledger-rules/NOTES.md +11 -5
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +8 -1
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +4 -2
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +1 -1
- package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/NOTES.md +34 -0
- package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/expected.json +57 -0
- package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/setup.sh +2 -0
- package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/spec.md +67 -0
- package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/task.txt +7 -0
- package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/verifiers/duplicate-event-error.js +35 -0
- package/benchmark/auto-resolve/fixtures/F31-cli-seat-rebalance/verifiers/priority-transfer-rollback.js +53 -0
- package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/NOTES.md +38 -0
- package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/expected.json +57 -0
- package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/setup.sh +2 -0
- package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/spec.md +70 -0
- package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/task.txt +3 -0
- package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/verifiers/duplicate-renewal-error.js +42 -0
- package/benchmark/auto-resolve/fixtures/F32-cli-subscription-renewal/verifiers/priority-credit-rollback.js +70 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +10 -3
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +7 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +5 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +7 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +3 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +1 -1
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +15 -3
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +1 -1
- package/benchmark/auto-resolve/fixtures/SCHEMA.md +53 -7
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/NOTES.md +37 -0
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/RETIRED.md +13 -0
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/expected.json +56 -0
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/setup.sh +18 -0
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/spec.md +69 -0
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/task.txt +7 -0
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/verifiers/exact-proration.js +48 -0
- package/benchmark/auto-resolve/fixtures/retired/F27-cli-subscription-proration/verifiers/rules-source-and-conflict.js +79 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/NOTES.md +54 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/RETIRED.md +7 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/expected.json +67 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/setup.sh +2 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/spec.md +67 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/task.txt +5 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/policy-precedence.js +72 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/validation-and-immutability.js +43 -0
- package/benchmark/auto-resolve/fixtures/retired/F28-cli-return-authorization/verifiers/validation-boundary.js +116 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/NOTES.md +35 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/RETIRED.md +12 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/expected.json +58 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/setup.sh +2 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/spec.md +73 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/task.txt +17 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/verifiers/mixed-idempotent-settlement.js +53 -0
- package/benchmark/auto-resolve/fixtures/retired/F30-cli-credit-hold-settlement/verifiers/rejection-boundaries.js +74 -0
- package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/NOTES.md +60 -0
- package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/RETIRED.md +29 -0
- package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/expected.json +73 -0
- package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/setup.sh +28 -0
- package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/spec.md +58 -0
- package/benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/task.txt +5 -0
- package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/full-pipeline-pair-gate.json +82 -0
- package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/full-pipeline-pair-gate.md +18 -0
- package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/headroom-gate.json +46 -0
- package/benchmark/auto-resolve/results/20260510-f16-f23-f25-combined-proof/headroom-gate.md +17 -0
- package/benchmark/auto-resolve/run-real-benchmark.md +303 -0
- package/benchmark/auto-resolve/scripts/audit-headroom-rejections.py +441 -0
- package/benchmark/auto-resolve/scripts/audit-pair-evidence.py +1256 -0
- package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +147 -15
- package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +28 -16
- package/benchmark/auto-resolve/scripts/collect-swebench-predictions.py +11 -1
- package/benchmark/auto-resolve/scripts/compile-report.py +208 -46
- package/benchmark/auto-resolve/scripts/fetch-swebench-instances.py +22 -4
- package/benchmark/auto-resolve/scripts/frozen-verify-gate.py +175 -30
- package/benchmark/auto-resolve/scripts/full-pipeline-pair-gate.py +408 -46
- package/benchmark/auto-resolve/scripts/headroom-gate.py +270 -39
- package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +164 -33
- package/benchmark/auto-resolve/scripts/iter-0033c-l1-summary.py +97 -0
- package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +150 -38
- package/benchmark/auto-resolve/scripts/judge.sh +153 -26
- package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +12 -5
- package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +25 -2
- package/benchmark/auto-resolve/scripts/pair-candidate-frontier.py +469 -0
- package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +5 -5
- package/benchmark/auto-resolve/scripts/pair-plan-lint.py +9 -2
- package/benchmark/auto-resolve/scripts/pair-rejected-fixtures.sh +91 -0
- package/benchmark/auto-resolve/scripts/pair_evidence_contract.py +269 -0
- package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-case.py +39 -10
- package/benchmark/auto-resolve/scripts/prepare-swebench-frozen-corpus.py +34 -4
- package/benchmark/auto-resolve/scripts/prepare-swebench-solver-worktree.py +23 -5
- package/benchmark/auto-resolve/scripts/recent-benchmark-summary.py +232 -0
- package/benchmark/auto-resolve/scripts/run-fixture.sh +118 -51
- package/benchmark/auto-resolve/scripts/run-frozen-verify-pair.sh +211 -39
- package/benchmark/auto-resolve/scripts/run-full-pipeline-pair-candidate.sh +335 -39
- package/benchmark/auto-resolve/scripts/run-headroom-candidate.sh +249 -6
- package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +22 -48
- package/benchmark/auto-resolve/scripts/run-suite.sh +44 -7
- package/benchmark/auto-resolve/scripts/run-swebench-frozen-corpus.sh +120 -19
- package/benchmark/auto-resolve/scripts/run-swebench-solver-batch.sh +32 -14
- package/benchmark/auto-resolve/scripts/ship-gate.py +219 -50
- package/benchmark/auto-resolve/scripts/solo-ceiling-avoidance.py +53 -0
- package/benchmark/auto-resolve/scripts/solo-headroom-hypothesis.py +77 -0
- package/benchmark/auto-resolve/scripts/swebench-frozen-matrix.py +239 -26
- package/benchmark/auto-resolve/scripts/test-audit-headroom-rejections.sh +288 -0
- package/benchmark/auto-resolve/scripts/test-audit-pair-evidence.sh +1672 -0
- package/benchmark/auto-resolve/scripts/test-benchmark-arg-parsing.sh +933 -0
- package/benchmark/auto-resolve/scripts/test-build-pair-eligible-manifest.sh +491 -0
- package/benchmark/auto-resolve/scripts/test-check-f9-artifacts.sh +91 -0
- package/benchmark/auto-resolve/scripts/test-frozen-verify-gate.sh +328 -3
- package/benchmark/auto-resolve/scripts/test-full-pipeline-pair-gate.sh +497 -18
- package/benchmark/auto-resolve/scripts/test-headroom-gate.sh +331 -14
- package/benchmark/auto-resolve/scripts/test-iter-0033c-compare.sh +525 -0
- package/benchmark/auto-resolve/scripts/test-iter-0033c-l1-summary.sh +254 -0
- package/benchmark/auto-resolve/scripts/test-lint-fixtures.sh +580 -0
- package/benchmark/auto-resolve/scripts/test-pair-candidate-frontier.sh +591 -0
- package/benchmark/auto-resolve/scripts/test-run-full-pipeline-pair-candidate.sh +497 -0
- package/benchmark/auto-resolve/scripts/test-run-headroom-candidate.sh +401 -0
- package/benchmark/auto-resolve/scripts/test-run-swebench-solver-batch.sh +111 -0
- package/benchmark/auto-resolve/scripts/test-ship-gate.sh +1189 -0
- package/benchmark/auto-resolve/scripts/test-swebench-frozen-case.sh +924 -5
- package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/NOTES.md +28 -0
- package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/expected.json +63 -0
- package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/metadata.json +10 -0
- package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/setup.sh +3 -0
- package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/spec.md +47 -0
- package/benchmark/auto-resolve/shadow-fixtures/S1-cli-lang-flag/task.txt +1 -0
- package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/NOTES.md +34 -0
- package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/expected.json +53 -0
- package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/metadata.json +10 -0
- package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/setup.sh +3 -0
- package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/spec.md +50 -0
- package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/task.txt +1 -0
- package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/verifiers/duplicate-order-error.js +27 -0
- package/benchmark/auto-resolve/shadow-fixtures/S2-cli-inventory-reservation/verifiers/priority-stock-reservation.js +44 -0
- package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/NOTES.md +34 -0
- package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/expected.json +55 -0
- package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/metadata.json +10 -0
- package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/setup.sh +3 -0
- package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/spec.md +52 -0
- package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/task.txt +1 -0
- package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/verifiers/duplicate-ticket-error.js +29 -0
- package/benchmark/auto-resolve/shadow-fixtures/S3-cli-ticket-assignment/verifiers/priority-agent-assignment.js +48 -0
- package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/NOTES.md +34 -0
- package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/expected.json +55 -0
- package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/metadata.json +10 -0
- package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/setup.sh +3 -0
- package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/spec.md +55 -0
- package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/task.txt +1 -0
- package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/verifiers/duplicate-return-error.js +43 -0
- package/benchmark/auto-resolve/shadow-fixtures/S4-cli-return-routing/verifiers/priority-return-routing.js +70 -0
- package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/NOTES.md +37 -0
- package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/expected.json +54 -0
- package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/metadata.json +10 -0
- package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/setup.sh +3 -0
- package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/spec.md +59 -0
- package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/task.txt +1 -0
- package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/verifiers/credit-ledger-priority.js +98 -0
- package/benchmark/auto-resolve/shadow-fixtures/S5-cli-credit-grant-ledger/verifiers/duplicate-charge-error.js +38 -0
- package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/NOTES.md +36 -0
- package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/expected.json +56 -0
- package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/metadata.json +10 -0
- package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/setup.sh +3 -0
- package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/spec.md +59 -0
- package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/task.txt +1 -0
- package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/verifiers/duplicate-refund-error.js +41 -0
- package/benchmark/auto-resolve/shadow-fixtures/S6-cli-refund-window-ledger/verifiers/priority-refund-ledger.js +65 -0
- package/bin/devlyn.js +221 -17
- package/config/skills/_shared/adapters/README.md +3 -0
- package/config/skills/_shared/adapters/gpt-5-5.md +5 -1
- package/config/skills/_shared/adapters/opus-4-7.md +9 -1
- package/config/skills/_shared/archive_run.py +78 -6
- package/config/skills/_shared/codex-config.md +5 -4
- package/config/skills/_shared/codex-monitored.sh +46 -1
- package/config/skills/_shared/collect-codex-findings.py +20 -5
- package/config/skills/_shared/engine-preflight.md +17 -13
- package/config/skills/_shared/runtime-principles.md +6 -9
- package/config/skills/_shared/spec-verify-check.py +2664 -107
- package/config/skills/_shared/verify-merge-findings.py +1369 -19
- package/config/skills/devlyn:design-ui/SKILL.md +364 -0
- package/config/skills/devlyn:ideate/SKILL.md +7 -4
- package/config/skills/devlyn:ideate/references/elicitation.md +50 -4
- package/config/skills/devlyn:ideate/references/from-spec-mode.md +26 -4
- package/config/skills/devlyn:ideate/references/project-mode.md +20 -1
- package/config/skills/devlyn:ideate/references/spec-template.md +10 -1
- package/config/skills/devlyn:resolve/SKILL.md +78 -26
- package/config/skills/devlyn:resolve/references/free-form-mode.md +15 -0
- package/config/skills/devlyn:resolve/references/phases/build-gate.md +2 -2
- package/config/skills/devlyn:resolve/references/phases/implement.md +1 -1
- package/config/skills/devlyn:resolve/references/phases/probe-derive.md +74 -2
- package/config/skills/devlyn:resolve/references/phases/verify.md +80 -29
- package/config/skills/devlyn:resolve/references/state-schema.md +9 -4
- package/package.json +47 -2
- package/scripts/lint-fixtures.sh +349 -0
- package/scripts/lint-shadow-fixtures.sh +58 -0
- package/scripts/lint-skills.sh +3645 -95
|
@@ -12,6 +12,7 @@ from __future__ import annotations
|
|
|
12
12
|
import argparse
|
|
13
13
|
import json
|
|
14
14
|
import pathlib
|
|
15
|
+
import re
|
|
15
16
|
import sys
|
|
16
17
|
import tempfile
|
|
17
18
|
from typing import Any
|
|
@@ -32,6 +33,54 @@ VERDICT_RANK = {
|
|
|
32
33
|
"BLOCKED": 3,
|
|
33
34
|
}
|
|
34
35
|
RANK_VERDICT = {0: "PASS", 1: "PASS_WITH_ISSUES", 2: "NEEDS_WORK", 3: "BLOCKED"}
|
|
36
|
+
ALLOWED_PAIR_SKIP_REASONS = {"user_no_pair", "mechanical_blocker", "primary_judge_blocker"}
|
|
37
|
+
KNOWN_PAIR_TRIGGER_REASONS = {
|
|
38
|
+
"mode.verify-only",
|
|
39
|
+
"mode.pair-verify",
|
|
40
|
+
"complexity.high",
|
|
41
|
+
"complexity.large",
|
|
42
|
+
"spec.complexity.high",
|
|
43
|
+
"spec.complexity.large",
|
|
44
|
+
"spec.solo_headroom_hypothesis",
|
|
45
|
+
"risk.high",
|
|
46
|
+
"risk_probes.enabled",
|
|
47
|
+
"risk_probes.present",
|
|
48
|
+
"coverage.failed",
|
|
49
|
+
"mechanical.warning",
|
|
50
|
+
"judge.warning",
|
|
51
|
+
}
|
|
52
|
+
OBSERVABLE_COMMAND_MARKERS = ("command", "observable", "expose")
|
|
53
|
+
BACKTICKED_TEXT_RE = re.compile(r"`[^`\n]+`")
|
|
54
|
+
RESERVED_BACKTICK_TERMS = {"solo-headroom hypothesis", "solo_claude", "miss"}
|
|
55
|
+
COMMAND_PREFIXES = {
|
|
56
|
+
"bash",
|
|
57
|
+
"bun",
|
|
58
|
+
"cargo",
|
|
59
|
+
"git",
|
|
60
|
+
"go",
|
|
61
|
+
"jest",
|
|
62
|
+
"make",
|
|
63
|
+
"node",
|
|
64
|
+
"npm",
|
|
65
|
+
"pnpm",
|
|
66
|
+
"printf",
|
|
67
|
+
"pytest",
|
|
68
|
+
"python",
|
|
69
|
+
"python3",
|
|
70
|
+
"ruff",
|
|
71
|
+
"sh",
|
|
72
|
+
"uv",
|
|
73
|
+
"vitest",
|
|
74
|
+
"yarn",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def reject_json_constant(token: str) -> None:
|
|
79
|
+
raise ValueError(f"invalid JSON numeric constant: {token}")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def loads_strict_json(text: str) -> Any:
|
|
83
|
+
return json.loads(text, parse_constant=reject_json_constant)
|
|
35
84
|
|
|
36
85
|
|
|
37
86
|
def rank(verdict: str | None) -> int:
|
|
@@ -42,6 +91,18 @@ def worse(a: str | None, b: str | None) -> str:
|
|
|
42
91
|
return RANK_VERDICT[max(rank(a), rank(b))]
|
|
43
92
|
|
|
44
93
|
|
|
94
|
+
def is_known_pair_trigger_reason(reason: str) -> bool:
|
|
95
|
+
return reason in KNOWN_PAIR_TRIGGER_REASONS
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def has_known_pair_trigger_reason(reasons: list[str]) -> bool:
|
|
99
|
+
return any(is_known_pair_trigger_reason(reason) for reason in reasons)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def all_known_pair_trigger_reasons(reasons: list[str]) -> bool:
|
|
103
|
+
return all(is_known_pair_trigger_reason(reason) for reason in reasons)
|
|
104
|
+
|
|
105
|
+
|
|
45
106
|
def finding_rank(finding: dict[str, Any]) -> int:
|
|
46
107
|
severity = str(finding.get("severity") or "").upper()
|
|
47
108
|
if severity in {"CRITICAL", "HIGH"}:
|
|
@@ -66,8 +127,8 @@ def read_findings(devlyn: pathlib.Path) -> tuple[list[dict[str, Any]], dict[str,
|
|
|
66
127
|
if not raw:
|
|
67
128
|
continue
|
|
68
129
|
try:
|
|
69
|
-
item =
|
|
70
|
-
except
|
|
130
|
+
item = loads_strict_json(raw)
|
|
131
|
+
except ValueError as exc:
|
|
71
132
|
blocked = {
|
|
72
133
|
"id": f"verify-merge-invalid-json-{name}-{line_no}",
|
|
73
134
|
"rule_id": "verify.findings.invalid-json",
|
|
@@ -102,16 +163,318 @@ def has_pair_findings(devlyn: pathlib.Path) -> bool:
|
|
|
102
163
|
return False
|
|
103
164
|
|
|
104
165
|
|
|
166
|
+
def pair_trigger_status(devlyn: pathlib.Path) -> tuple[bool, dict[str, Any] | None]:
|
|
167
|
+
state_path = devlyn / "pipeline.state.json"
|
|
168
|
+
if not state_path.is_file():
|
|
169
|
+
return False, None
|
|
170
|
+
try:
|
|
171
|
+
state = loads_strict_json(state_path.read_text(encoding="utf-8"))
|
|
172
|
+
except ValueError:
|
|
173
|
+
return False, {
|
|
174
|
+
"id": "verify-pair-trigger-state-malformed",
|
|
175
|
+
"message": "pipeline.state.json is malformed; cannot verify pair_trigger contract.",
|
|
176
|
+
"file": "pipeline.state.json",
|
|
177
|
+
}
|
|
178
|
+
phases = state.get("phases") if isinstance(state, dict) else {}
|
|
179
|
+
verify_phase = phases.get("verify") if isinstance(phases, dict) else None
|
|
180
|
+
trigger = None
|
|
181
|
+
if isinstance(verify_phase, dict):
|
|
182
|
+
trigger = verify_phase.get("pair_trigger")
|
|
183
|
+
if trigger is None and isinstance(state, dict):
|
|
184
|
+
verify_state = state.get("verify")
|
|
185
|
+
if isinstance(verify_state, dict):
|
|
186
|
+
trigger = verify_state.get("pair_trigger")
|
|
187
|
+
if trigger is None:
|
|
188
|
+
return False, None
|
|
189
|
+
if not isinstance(trigger, dict):
|
|
190
|
+
return False, {
|
|
191
|
+
"id": "verify-pair-trigger-malformed",
|
|
192
|
+
"message": "pair_trigger must be an object.",
|
|
193
|
+
"file": "pipeline.state.json",
|
|
194
|
+
}
|
|
195
|
+
eligible = trigger.get("eligible")
|
|
196
|
+
if not isinstance(eligible, bool):
|
|
197
|
+
return False, {
|
|
198
|
+
"id": "verify-pair-trigger-eligible-malformed",
|
|
199
|
+
"message": "pair_trigger.eligible must be a boolean.",
|
|
200
|
+
"file": "pipeline.state.json",
|
|
201
|
+
}
|
|
202
|
+
reasons = trigger.get("reasons")
|
|
203
|
+
if not isinstance(reasons, list) or not all(isinstance(item, str) for item in reasons):
|
|
204
|
+
return False, {
|
|
205
|
+
"id": "verify-pair-trigger-reasons-malformed",
|
|
206
|
+
"message": "pair_trigger.reasons must be a list of strings.",
|
|
207
|
+
"file": "pipeline.state.json",
|
|
208
|
+
}
|
|
209
|
+
skipped_reason = trigger.get("skipped_reason")
|
|
210
|
+
if skipped_reason is not None and not isinstance(skipped_reason, str):
|
|
211
|
+
return False, {
|
|
212
|
+
"id": "verify-pair-trigger-skipped-reason-malformed",
|
|
213
|
+
"message": "pair_trigger.skipped_reason must be a string or null.",
|
|
214
|
+
"file": "pipeline.state.json",
|
|
215
|
+
}
|
|
216
|
+
if eligible is True and not reasons:
|
|
217
|
+
return False, {
|
|
218
|
+
"id": "verify-pair-trigger-reasons-empty",
|
|
219
|
+
"message": "pair_trigger.eligible cannot be true with an empty reasons list.",
|
|
220
|
+
"file": "pipeline.state.json",
|
|
221
|
+
}
|
|
222
|
+
if eligible is True and not has_known_pair_trigger_reason(reasons):
|
|
223
|
+
return False, {
|
|
224
|
+
"id": "verify-pair-trigger-reasons-unknown",
|
|
225
|
+
"message": "pair_trigger.reasons must include a known pair-trigger reason.",
|
|
226
|
+
"file": "pipeline.state.json",
|
|
227
|
+
}
|
|
228
|
+
if eligible is True and not all_known_pair_trigger_reasons(reasons):
|
|
229
|
+
return False, {
|
|
230
|
+
"id": "verify-pair-trigger-reasons-unknown",
|
|
231
|
+
"message": "pair_trigger.reasons must only include known pair-trigger reasons.",
|
|
232
|
+
"file": "pipeline.state.json",
|
|
233
|
+
}
|
|
234
|
+
if eligible is True and skipped_reason is not None:
|
|
235
|
+
return False, {
|
|
236
|
+
"id": "verify-pair-trigger-skip-contradiction",
|
|
237
|
+
"message": "pair_trigger.eligible cannot be true while skipped_reason is set.",
|
|
238
|
+
"file": "pipeline.state.json",
|
|
239
|
+
}
|
|
240
|
+
if eligible is False and reasons:
|
|
241
|
+
return False, {
|
|
242
|
+
"id": "verify-pair-trigger-ineligible-reasons",
|
|
243
|
+
"message": "pair_trigger.reasons must be empty when pair_trigger.eligible is false.",
|
|
244
|
+
"file": "pipeline.state.json",
|
|
245
|
+
}
|
|
246
|
+
return eligible is True and len(reasons) > 0, None
|
|
247
|
+
|
|
248
|
+
|
|
105
249
|
def pair_trigger_required(devlyn: pathlib.Path) -> bool:
|
|
250
|
+
required, _malformed = pair_trigger_status(devlyn)
|
|
251
|
+
return required
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def pair_trigger_present(devlyn: pathlib.Path) -> bool:
|
|
106
255
|
state_path = devlyn / "pipeline.state.json"
|
|
107
256
|
if not state_path.is_file():
|
|
108
257
|
return False
|
|
109
258
|
try:
|
|
110
|
-
state =
|
|
111
|
-
except
|
|
259
|
+
state = loads_strict_json(state_path.read_text(encoding="utf-8"))
|
|
260
|
+
except ValueError:
|
|
112
261
|
return False
|
|
113
262
|
phases = state.get("phases") if isinstance(state, dict) else {}
|
|
114
263
|
verify_phase = phases.get("verify") if isinstance(phases, dict) else None
|
|
264
|
+
if isinstance(verify_phase, dict) and "pair_trigger" in verify_phase:
|
|
265
|
+
return True
|
|
266
|
+
if isinstance(state, dict):
|
|
267
|
+
verify_state = state.get("verify")
|
|
268
|
+
if isinstance(verify_state, dict) and "pair_trigger" in verify_state:
|
|
269
|
+
return True
|
|
270
|
+
return False
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def pair_flag_contract_violation(devlyn: pathlib.Path) -> dict[str, Any] | None:
|
|
274
|
+
state_path = devlyn / "pipeline.state.json"
|
|
275
|
+
if not state_path.is_file():
|
|
276
|
+
return None
|
|
277
|
+
try:
|
|
278
|
+
state = loads_strict_json(state_path.read_text(encoding="utf-8"))
|
|
279
|
+
except ValueError:
|
|
280
|
+
return None
|
|
281
|
+
if not isinstance(state, dict) or state.get("pair_verify") is not True:
|
|
282
|
+
return None
|
|
283
|
+
risk_profile = state.get("risk_profile")
|
|
284
|
+
if isinstance(risk_profile, dict) and risk_profile.get("pair_default_enabled") is False:
|
|
285
|
+
return {
|
|
286
|
+
"id": "verify-pair-trigger-conflicting-pair-flags",
|
|
287
|
+
"message": "--pair-verify and --no-pair are mutually exclusive.",
|
|
288
|
+
"file": "pipeline.state.json",
|
|
289
|
+
}
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def risk_profile_contract_violation(devlyn: pathlib.Path) -> dict[str, Any] | None:
|
|
294
|
+
state_path = devlyn / "pipeline.state.json"
|
|
295
|
+
if not state_path.is_file():
|
|
296
|
+
return None
|
|
297
|
+
try:
|
|
298
|
+
state = loads_strict_json(state_path.read_text(encoding="utf-8"))
|
|
299
|
+
except ValueError:
|
|
300
|
+
return None
|
|
301
|
+
if not isinstance(state, dict) or "risk_profile" not in state:
|
|
302
|
+
return None
|
|
303
|
+
risk_profile = state.get("risk_profile")
|
|
304
|
+
if not isinstance(risk_profile, dict):
|
|
305
|
+
return {
|
|
306
|
+
"id": "verify-risk-profile-malformed",
|
|
307
|
+
"message": "risk_profile must be an object.",
|
|
308
|
+
"file": "pipeline.state.json",
|
|
309
|
+
}
|
|
310
|
+
for field in ("high_risk", "risk_probes_enabled", "pair_default_enabled"):
|
|
311
|
+
if field in risk_profile and not isinstance(risk_profile.get(field), bool):
|
|
312
|
+
return {
|
|
313
|
+
"id": "verify-risk-profile-malformed",
|
|
314
|
+
"message": f"risk_profile.{field} must be a boolean.",
|
|
315
|
+
"file": "pipeline.state.json",
|
|
316
|
+
}
|
|
317
|
+
reasons = risk_profile.get("reasons")
|
|
318
|
+
if "reasons" in risk_profile and (
|
|
319
|
+
not isinstance(reasons, list) or not all(isinstance(item, str) for item in reasons)
|
|
320
|
+
):
|
|
321
|
+
return {
|
|
322
|
+
"id": "verify-risk-profile-malformed",
|
|
323
|
+
"message": "risk_profile.reasons must be a list of strings.",
|
|
324
|
+
"file": "pipeline.state.json",
|
|
325
|
+
}
|
|
326
|
+
return None
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def source_spec_text(state: dict[str, Any]) -> str | None:
|
|
330
|
+
source = state.get("source") if isinstance(state.get("source"), dict) else {}
|
|
331
|
+
for key in ("spec_path", "criteria_path"):
|
|
332
|
+
raw_path = source.get(key)
|
|
333
|
+
if not isinstance(raw_path, str) or not raw_path:
|
|
334
|
+
continue
|
|
335
|
+
path = pathlib.Path(raw_path)
|
|
336
|
+
if not path.is_absolute():
|
|
337
|
+
path = pathlib.Path.cwd() / path
|
|
338
|
+
try:
|
|
339
|
+
return path.read_text(encoding="utf-8")
|
|
340
|
+
except OSError:
|
|
341
|
+
continue
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def spec_frontmatter_complexity(state: dict[str, Any]) -> str | None:
|
|
346
|
+
text = source_spec_text(state)
|
|
347
|
+
if text is None:
|
|
348
|
+
return None
|
|
349
|
+
if not text.startswith("---"):
|
|
350
|
+
return None
|
|
351
|
+
end = text.find("\n---", 3)
|
|
352
|
+
if end == -1:
|
|
353
|
+
return None
|
|
354
|
+
for line in text[3:end].splitlines():
|
|
355
|
+
match = re.match(r"\s*complexity\s*:\s*[\"']?([A-Za-z_-]+)", line)
|
|
356
|
+
if match:
|
|
357
|
+
return match.group(1).lower()
|
|
358
|
+
return None
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def spec_has_solo_headroom_hypothesis(state: dict[str, Any]) -> bool:
|
|
362
|
+
text = source_spec_text(state)
|
|
363
|
+
if text is None:
|
|
364
|
+
return False
|
|
365
|
+
lower = text.lower()
|
|
366
|
+
return (
|
|
367
|
+
"solo-headroom hypothesis" in lower
|
|
368
|
+
and "solo_claude" in lower
|
|
369
|
+
and "miss" in lower
|
|
370
|
+
and has_backticked_observable_command(text)
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def has_backticked_observable_command(text: str) -> bool:
|
|
375
|
+
for line in text.splitlines():
|
|
376
|
+
lower = line.lower()
|
|
377
|
+
if "miss" not in lower or not any(marker in lower for marker in OBSERVABLE_COMMAND_MARKERS):
|
|
378
|
+
continue
|
|
379
|
+
if any(is_command_like_backtick(match.group(0).strip("`")) for match in BACKTICKED_TEXT_RE.finditer(line)):
|
|
380
|
+
return True
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def is_command_like_backtick(value: str) -> bool:
|
|
385
|
+
stripped = value.strip()
|
|
386
|
+
lower = stripped.lower()
|
|
387
|
+
if not stripped or lower in RESERVED_BACKTICK_TERMS:
|
|
388
|
+
return False
|
|
389
|
+
first = lower.split(maxsplit=1)[0]
|
|
390
|
+
return (
|
|
391
|
+
first in COMMAND_PREFIXES
|
|
392
|
+
or any(marker in stripped for marker in ("/", "$", "=", "|", "&&", ";"))
|
|
393
|
+
or stripped.endswith((".js", ".py", ".sh"))
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def state_pair_trigger_reasons(
|
|
398
|
+
devlyn: pathlib.Path,
|
|
399
|
+
source_verdicts: dict[str, str],
|
|
400
|
+
) -> list[str]:
|
|
401
|
+
state_path = devlyn / "pipeline.state.json"
|
|
402
|
+
if not state_path.is_file():
|
|
403
|
+
return []
|
|
404
|
+
try:
|
|
405
|
+
state = loads_strict_json(state_path.read_text(encoding="utf-8"))
|
|
406
|
+
except ValueError:
|
|
407
|
+
return []
|
|
408
|
+
if not isinstance(state, dict):
|
|
409
|
+
return []
|
|
410
|
+
phases = state.get("phases") if isinstance(state.get("phases"), dict) else {}
|
|
411
|
+
verify_phase = phases.get("verify") if isinstance(phases, dict) else {}
|
|
412
|
+
verify_state = state.get("verify") if isinstance(state.get("verify"), dict) else {}
|
|
413
|
+
risk_profile = state.get("risk_profile") if isinstance(state.get("risk_profile"), dict) else {}
|
|
414
|
+
reasons: list[str] = []
|
|
415
|
+
if state.get("mode") == "verify-only":
|
|
416
|
+
reasons.append("mode.verify-only")
|
|
417
|
+
if state.get("pair_verify") is True:
|
|
418
|
+
reasons.append("mode.pair-verify")
|
|
419
|
+
if state.get("complexity") in {"high", "large"}:
|
|
420
|
+
reasons.append(f"complexity.{state.get('complexity')}")
|
|
421
|
+
spec_complexity = spec_frontmatter_complexity(state)
|
|
422
|
+
if spec_complexity in {"high", "large"}:
|
|
423
|
+
reasons.append(f"spec.complexity.{spec_complexity}")
|
|
424
|
+
if spec_has_solo_headroom_hypothesis(state):
|
|
425
|
+
reasons.append("spec.solo_headroom_hypothesis")
|
|
426
|
+
if risk_profile.get("high_risk") is True:
|
|
427
|
+
reasons.append("risk.high")
|
|
428
|
+
if risk_profile.get("risk_probes_enabled") is True:
|
|
429
|
+
reasons.append("risk_probes.enabled")
|
|
430
|
+
if (devlyn / "risk-probes.jsonl").is_file():
|
|
431
|
+
reasons.append("risk_probes.present")
|
|
432
|
+
coverage_failed = False
|
|
433
|
+
if isinstance(verify_state, dict) and verify_state.get("coverage_failed") is True:
|
|
434
|
+
coverage_failed = True
|
|
435
|
+
if isinstance(verify_phase, dict) and verify_phase.get("coverage_failed") is True:
|
|
436
|
+
coverage_failed = True
|
|
437
|
+
if coverage_failed:
|
|
438
|
+
reasons.append("coverage.failed")
|
|
439
|
+
if rank(source_verdicts.get("mechanical")) == 1:
|
|
440
|
+
reasons.append("mechanical.warning")
|
|
441
|
+
if rank(source_verdicts.get("judge")) == 1:
|
|
442
|
+
reasons.append("judge.warning")
|
|
443
|
+
return reasons
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def pair_trigger_missing_contract_violation(
|
|
447
|
+
devlyn: pathlib.Path,
|
|
448
|
+
source_verdicts: dict[str, str],
|
|
449
|
+
) -> dict[str, Any] | None:
|
|
450
|
+
if rank(source_verdicts.get("mechanical")) >= 2 or rank(source_verdicts.get("judge")) >= 2:
|
|
451
|
+
return None
|
|
452
|
+
reasons = state_pair_trigger_reasons(devlyn, source_verdicts)
|
|
453
|
+
if not reasons:
|
|
454
|
+
return None
|
|
455
|
+
return {
|
|
456
|
+
"id": "verify-pair-trigger-required-missing",
|
|
457
|
+
"message": (
|
|
458
|
+
"pair_trigger is missing even though VERIFY state requires a pair decision: "
|
|
459
|
+
+ ", ".join(reasons)
|
|
460
|
+
),
|
|
461
|
+
"file": "pipeline.state.json",
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def pair_trigger_skip_contract_violation(
|
|
466
|
+
devlyn: pathlib.Path,
|
|
467
|
+
source_verdicts: dict[str, str],
|
|
468
|
+
) -> dict[str, Any] | None:
|
|
469
|
+
state_path = devlyn / "pipeline.state.json"
|
|
470
|
+
if not state_path.is_file():
|
|
471
|
+
return None
|
|
472
|
+
try:
|
|
473
|
+
state = loads_strict_json(state_path.read_text(encoding="utf-8"))
|
|
474
|
+
except ValueError:
|
|
475
|
+
return None
|
|
476
|
+
phases = state.get("phases") if isinstance(state, dict) else {}
|
|
477
|
+
verify_phase = phases.get("verify") if isinstance(phases, dict) else None
|
|
115
478
|
trigger = None
|
|
116
479
|
if isinstance(verify_phase, dict):
|
|
117
480
|
trigger = verify_phase.get("pair_trigger")
|
|
@@ -119,11 +482,106 @@ def pair_trigger_required(devlyn: pathlib.Path) -> bool:
|
|
|
119
482
|
verify_state = state.get("verify")
|
|
120
483
|
if isinstance(verify_state, dict):
|
|
121
484
|
trigger = verify_state.get("pair_trigger")
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
485
|
+
if not isinstance(trigger, dict):
|
|
486
|
+
return None
|
|
487
|
+
skipped_reason = trigger.get("skipped_reason")
|
|
488
|
+
if trigger.get("eligible") is False and skipped_reason is None:
|
|
489
|
+
natural_reasons = state_pair_trigger_reasons(devlyn, source_verdicts)
|
|
490
|
+
if natural_reasons:
|
|
491
|
+
return {
|
|
492
|
+
"id": "verify-pair-trigger-ineligible-unjustified",
|
|
493
|
+
"message": (
|
|
494
|
+
"pair_trigger is ineligible without a skip reason even though "
|
|
495
|
+
"VERIFY state requires a pair decision: "
|
|
496
|
+
+ ", ".join(natural_reasons)
|
|
497
|
+
),
|
|
498
|
+
"file": "pipeline.state.json",
|
|
499
|
+
}
|
|
500
|
+
if skipped_reason is None:
|
|
501
|
+
return None
|
|
502
|
+
if skipped_reason not in ALLOWED_PAIR_SKIP_REASONS:
|
|
503
|
+
return {
|
|
504
|
+
"id": "verify-pair-trigger-skipped-reason-unsupported",
|
|
505
|
+
"message": (
|
|
506
|
+
"pair_trigger.skipped_reason must be user_no_pair, "
|
|
507
|
+
"mechanical_blocker, primary_judge_blocker, or null."
|
|
508
|
+
),
|
|
509
|
+
"file": "pipeline.state.json",
|
|
510
|
+
}
|
|
511
|
+
if skipped_reason == "user_no_pair":
|
|
512
|
+
risk_profile = state.get("risk_profile") if isinstance(state, dict) else {}
|
|
513
|
+
if not isinstance(risk_profile, dict) or risk_profile.get("pair_default_enabled") is not False:
|
|
514
|
+
return {
|
|
515
|
+
"id": "verify-pair-trigger-user-no-pair-unsupported",
|
|
516
|
+
"message": (
|
|
517
|
+
"pair_trigger skipped_reason user_no_pair requires "
|
|
518
|
+
"risk_profile.pair_default_enabled false from an explicit --no-pair opt-out."
|
|
519
|
+
),
|
|
520
|
+
"file": "pipeline.state.json",
|
|
521
|
+
}
|
|
522
|
+
if skipped_reason == "mechanical_blocker" and rank(source_verdicts.get("mechanical")) < 2:
|
|
523
|
+
return {
|
|
524
|
+
"id": "verify-pair-trigger-mechanical-blocker-unsupported",
|
|
525
|
+
"message": (
|
|
526
|
+
"pair_trigger skipped_reason mechanical_blocker requires a "
|
|
527
|
+
"verdict-binding MECHANICAL finding."
|
|
528
|
+
),
|
|
529
|
+
"file": "pipeline.state.json",
|
|
530
|
+
}
|
|
531
|
+
if skipped_reason == "primary_judge_blocker" and rank(source_verdicts.get("judge")) < 2:
|
|
532
|
+
return {
|
|
533
|
+
"id": "verify-pair-trigger-primary-judge-blocker-unsupported",
|
|
534
|
+
"message": (
|
|
535
|
+
"pair_trigger skipped_reason primary_judge_blocker requires a "
|
|
536
|
+
"verdict-binding primary JUDGE finding."
|
|
537
|
+
),
|
|
538
|
+
"file": "pipeline.state.json",
|
|
539
|
+
}
|
|
540
|
+
return None
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
def pair_trigger_reason_completeness_violation(
|
|
544
|
+
devlyn: pathlib.Path,
|
|
545
|
+
source_verdicts: dict[str, str],
|
|
546
|
+
) -> dict[str, Any] | None:
|
|
547
|
+
if rank(source_verdicts.get("mechanical")) >= 2 or rank(source_verdicts.get("judge")) >= 2:
|
|
548
|
+
return None
|
|
549
|
+
state_path = devlyn / "pipeline.state.json"
|
|
550
|
+
if not state_path.is_file():
|
|
551
|
+
return None
|
|
552
|
+
try:
|
|
553
|
+
state = loads_strict_json(state_path.read_text(encoding="utf-8"))
|
|
554
|
+
except ValueError:
|
|
555
|
+
return None
|
|
556
|
+
phases = state.get("phases") if isinstance(state, dict) else {}
|
|
557
|
+
verify_phase = phases.get("verify") if isinstance(phases, dict) else None
|
|
558
|
+
trigger = None
|
|
559
|
+
if isinstance(verify_phase, dict):
|
|
560
|
+
trigger = verify_phase.get("pair_trigger")
|
|
561
|
+
if trigger is None and isinstance(state, dict):
|
|
562
|
+
verify_state = state.get("verify")
|
|
563
|
+
if isinstance(verify_state, dict):
|
|
564
|
+
trigger = verify_state.get("pair_trigger")
|
|
565
|
+
if not isinstance(trigger, dict) or trigger.get("eligible") is not True:
|
|
566
|
+
return None
|
|
567
|
+
reasons = trigger.get("reasons")
|
|
568
|
+
if not isinstance(reasons, list) or not all(isinstance(item, str) for item in reasons):
|
|
569
|
+
return None
|
|
570
|
+
missing = [
|
|
571
|
+
reason
|
|
572
|
+
for reason in state_pair_trigger_reasons(devlyn, source_verdicts)
|
|
573
|
+
if reason not in reasons
|
|
574
|
+
]
|
|
575
|
+
if not missing:
|
|
576
|
+
return None
|
|
577
|
+
return {
|
|
578
|
+
"id": "verify-pair-trigger-reasons-incomplete",
|
|
579
|
+
"message": (
|
|
580
|
+
"pair_trigger.reasons is missing applicable canonical reason(s): "
|
|
581
|
+
+ ", ".join(missing)
|
|
582
|
+
),
|
|
583
|
+
"file": "pipeline.state.json",
|
|
584
|
+
}
|
|
127
585
|
|
|
128
586
|
|
|
129
587
|
def pair_blocker(id_: str, message: str, file_: str | None = None) -> dict[str, Any]:
|
|
@@ -145,10 +603,71 @@ def detect_pair_stdout_contract_violations(
|
|
|
145
603
|
source_verdicts: dict[str, str],
|
|
146
604
|
) -> list[dict[str, Any]]:
|
|
147
605
|
stdout_path = devlyn / "codex-judge.stdout"
|
|
606
|
+
flag_violation = pair_flag_contract_violation(devlyn)
|
|
607
|
+
if flag_violation is not None:
|
|
608
|
+
source_verdicts["pair_judge"] = "BLOCKED"
|
|
609
|
+
return [
|
|
610
|
+
pair_blocker(
|
|
611
|
+
flag_violation["id"],
|
|
612
|
+
flag_violation["message"],
|
|
613
|
+
flag_violation["file"],
|
|
614
|
+
)
|
|
615
|
+
]
|
|
616
|
+
required, malformed_trigger = pair_trigger_status(devlyn)
|
|
617
|
+
if malformed_trigger is not None:
|
|
618
|
+
source_verdicts["pair_judge"] = "BLOCKED"
|
|
619
|
+
return [
|
|
620
|
+
pair_blocker(
|
|
621
|
+
malformed_trigger["id"],
|
|
622
|
+
malformed_trigger["message"],
|
|
623
|
+
malformed_trigger["file"],
|
|
624
|
+
)
|
|
625
|
+
]
|
|
626
|
+
risk_profile_violation = risk_profile_contract_violation(devlyn)
|
|
627
|
+
if risk_profile_violation is not None:
|
|
628
|
+
source_verdicts["pair_judge"] = "BLOCKED"
|
|
629
|
+
return [
|
|
630
|
+
pair_blocker(
|
|
631
|
+
risk_profile_violation["id"],
|
|
632
|
+
risk_profile_violation["message"],
|
|
633
|
+
risk_profile_violation["file"],
|
|
634
|
+
)
|
|
635
|
+
]
|
|
636
|
+
if not required and not pair_trigger_present(devlyn):
|
|
637
|
+
missing_violation = pair_trigger_missing_contract_violation(devlyn, source_verdicts)
|
|
638
|
+
if missing_violation is not None:
|
|
639
|
+
source_verdicts["pair_judge"] = "BLOCKED"
|
|
640
|
+
return [
|
|
641
|
+
pair_blocker(
|
|
642
|
+
missing_violation["id"],
|
|
643
|
+
missing_violation["message"],
|
|
644
|
+
missing_violation["file"],
|
|
645
|
+
)
|
|
646
|
+
]
|
|
647
|
+
skip_violation = pair_trigger_skip_contract_violation(devlyn, source_verdicts)
|
|
648
|
+
if skip_violation is not None:
|
|
649
|
+
source_verdicts["pair_judge"] = "BLOCKED"
|
|
650
|
+
return [
|
|
651
|
+
pair_blocker(
|
|
652
|
+
skip_violation["id"],
|
|
653
|
+
skip_violation["message"],
|
|
654
|
+
skip_violation["file"],
|
|
655
|
+
)
|
|
656
|
+
]
|
|
657
|
+
reason_violation = pair_trigger_reason_completeness_violation(devlyn, source_verdicts)
|
|
658
|
+
if reason_violation is not None:
|
|
659
|
+
source_verdicts["pair_judge"] = "BLOCKED"
|
|
660
|
+
return [
|
|
661
|
+
pair_blocker(
|
|
662
|
+
reason_violation["id"],
|
|
663
|
+
reason_violation["message"],
|
|
664
|
+
reason_violation["file"],
|
|
665
|
+
)
|
|
666
|
+
]
|
|
148
667
|
if has_pair_findings(devlyn):
|
|
149
668
|
return []
|
|
150
669
|
if not stdout_path.is_file():
|
|
151
|
-
if
|
|
670
|
+
if required:
|
|
152
671
|
source_verdicts["pair_judge"] = "BLOCKED"
|
|
153
672
|
return [
|
|
154
673
|
pair_blocker(
|
|
@@ -176,8 +695,8 @@ def detect_pair_stdout_contract_violations(
|
|
|
176
695
|
continue
|
|
177
696
|
if raw.startswith("# SUMMARY "):
|
|
178
697
|
try:
|
|
179
|
-
summary =
|
|
180
|
-
except
|
|
698
|
+
summary = loads_strict_json(raw.removeprefix("# SUMMARY ").strip())
|
|
699
|
+
except ValueError:
|
|
181
700
|
continue
|
|
182
701
|
if summary.get("verdict") in {"NEEDS_WORK", "FAIL", "BLOCKED"}:
|
|
183
702
|
has_nonpass_summary = True
|
|
@@ -185,8 +704,8 @@ def detect_pair_stdout_contract_violations(
|
|
|
185
704
|
if raw.startswith("#"):
|
|
186
705
|
continue
|
|
187
706
|
try:
|
|
188
|
-
item =
|
|
189
|
-
except
|
|
707
|
+
item = loads_strict_json(raw)
|
|
708
|
+
except ValueError:
|
|
190
709
|
continue
|
|
191
710
|
if isinstance(item, dict) and str(item.get("severity") or "").upper() in {
|
|
192
711
|
"CRITICAL",
|
|
@@ -237,7 +756,7 @@ def write_state(devlyn: pathlib.Path, summary: dict[str, Any]) -> None:
|
|
|
237
756
|
state_path = devlyn / "pipeline.state.json"
|
|
238
757
|
if not state_path.is_file():
|
|
239
758
|
raise SystemExit(f"error: {state_path} not found")
|
|
240
|
-
state =
|
|
759
|
+
state = loads_strict_json(state_path.read_text(encoding="utf-8"))
|
|
241
760
|
phases = state.setdefault("phases", {})
|
|
242
761
|
verify = phases.get("verify")
|
|
243
762
|
if not isinstance(verify, dict):
|
|
@@ -260,7 +779,19 @@ def self_test() -> int:
|
|
|
260
779
|
with tempfile.TemporaryDirectory() as tmp:
|
|
261
780
|
devlyn = pathlib.Path(tmp)
|
|
262
781
|
(devlyn / "pipeline.state.json").write_text(
|
|
263
|
-
json.dumps({
|
|
782
|
+
json.dumps({
|
|
783
|
+
"phases": {
|
|
784
|
+
"verify": {
|
|
785
|
+
"verdict": "PASS",
|
|
786
|
+
"sub_verdicts": {},
|
|
787
|
+
"pair_trigger": {
|
|
788
|
+
"eligible": True,
|
|
789
|
+
"reasons": ["risk.high", "judge.warning"],
|
|
790
|
+
"skipped_reason": None,
|
|
791
|
+
},
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
}),
|
|
264
795
|
encoding="utf-8",
|
|
265
796
|
)
|
|
266
797
|
(devlyn / "verify.findings.jsonl").write_text(
|
|
@@ -274,17 +805,33 @@ def self_test() -> int:
|
|
|
274
805
|
findings, source_verdicts = read_findings(devlyn)
|
|
275
806
|
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
276
807
|
write_state(devlyn, summary)
|
|
277
|
-
state =
|
|
808
|
+
state = loads_strict_json((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
|
|
278
809
|
assert summary["verdict"] == "NEEDS_WORK", summary
|
|
279
810
|
assert state["phases"]["verify"]["verdict"] == "NEEDS_WORK", state
|
|
280
811
|
assert state["phases"]["verify"]["sub_verdicts"]["pair_judge"] == "NEEDS_WORK", state
|
|
281
812
|
assert (devlyn / "verify-merged.findings.jsonl").read_text(encoding="utf-8")
|
|
813
|
+
(devlyn / "verify.findings.jsonl").write_text(
|
|
814
|
+
'{"id":"nan","severity":NaN}\n',
|
|
815
|
+
encoding="utf-8",
|
|
816
|
+
)
|
|
817
|
+
(devlyn / "verify.pair.findings.jsonl").write_text("", encoding="utf-8")
|
|
818
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
819
|
+
assert source_verdicts["judge"] == "BLOCKED", source_verdicts
|
|
820
|
+
assert any(
|
|
821
|
+
finding.get("id") == "verify-merge-invalid-json-verify.findings.jsonl-1"
|
|
822
|
+
and "invalid JSON numeric constant: NaN" in finding.get("message", "")
|
|
823
|
+
for finding in findings
|
|
824
|
+
), findings
|
|
825
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
826
|
+
json.dumps({"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}}}),
|
|
827
|
+
encoding="utf-8",
|
|
828
|
+
)
|
|
282
829
|
(devlyn / "verify.findings.jsonl").write_text("", encoding="utf-8")
|
|
283
830
|
(devlyn / "verify.pair.findings.jsonl").write_text("", encoding="utf-8")
|
|
284
831
|
findings, source_verdicts = read_findings(devlyn)
|
|
285
832
|
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
286
833
|
write_state(devlyn, summary)
|
|
287
|
-
state =
|
|
834
|
+
state = loads_strict_json((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
|
|
288
835
|
assert summary["verdict"] == "PASS", summary
|
|
289
836
|
assert state["phases"]["verify"]["verdict"] == "PASS", state
|
|
290
837
|
assert state["phases"]["verify"]["sub_verdicts"]["pair_judge"] == "PASS", state
|
|
@@ -296,9 +843,812 @@ def self_test() -> int:
|
|
|
296
843
|
findings, source_verdicts = read_findings(devlyn)
|
|
297
844
|
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
298
845
|
write_state(devlyn, summary)
|
|
299
|
-
state =
|
|
846
|
+
state = loads_strict_json((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
|
|
847
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
848
|
+
assert state["phases"]["verify"]["sub_verdicts"]["pair_judge"] == "BLOCKED", state
|
|
849
|
+
|
|
850
|
+
(devlyn / "codex-judge.stdout").unlink()
|
|
851
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
852
|
+
json.dumps({
|
|
853
|
+
"phases": {
|
|
854
|
+
"verify": {
|
|
855
|
+
"verdict": "PASS",
|
|
856
|
+
"sub_verdicts": {},
|
|
857
|
+
"pair_trigger": {
|
|
858
|
+
"eligible": True,
|
|
859
|
+
"reasons": ["risk.high"],
|
|
860
|
+
"skipped_reason": None,
|
|
861
|
+
},
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
}),
|
|
865
|
+
encoding="utf-8",
|
|
866
|
+
)
|
|
867
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
868
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
869
|
+
write_state(devlyn, summary)
|
|
870
|
+
state = loads_strict_json((devlyn / "pipeline.state.json").read_text(encoding="utf-8"))
|
|
300
871
|
assert summary["verdict"] == "BLOCKED", summary
|
|
301
872
|
assert state["phases"]["verify"]["sub_verdicts"]["pair_judge"] == "BLOCKED", state
|
|
873
|
+
assert any(
|
|
874
|
+
finding.get("id") == "verify-pair-required-output-missing"
|
|
875
|
+
for finding in findings
|
|
876
|
+
), findings
|
|
877
|
+
|
|
878
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
879
|
+
json.dumps({
|
|
880
|
+
"mode": "spec",
|
|
881
|
+
"risk_profile": {
|
|
882
|
+
"high_risk": True,
|
|
883
|
+
"risk_probes_enabled": True,
|
|
884
|
+
"pair_default_enabled": True,
|
|
885
|
+
},
|
|
886
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
887
|
+
}),
|
|
888
|
+
encoding="utf-8",
|
|
889
|
+
)
|
|
890
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
891
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
892
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
893
|
+
assert any(
|
|
894
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
895
|
+
for finding in findings
|
|
896
|
+
), findings
|
|
897
|
+
|
|
898
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
899
|
+
json.dumps({
|
|
900
|
+
"mode": "spec",
|
|
901
|
+
"risk_profile": "enabled",
|
|
902
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
903
|
+
}),
|
|
904
|
+
encoding="utf-8",
|
|
905
|
+
)
|
|
906
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
907
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
908
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
909
|
+
assert any(
|
|
910
|
+
finding.get("id") == "verify-risk-profile-malformed"
|
|
911
|
+
and "risk_profile must be an object" in str(finding.get("message"))
|
|
912
|
+
for finding in findings
|
|
913
|
+
), findings
|
|
914
|
+
|
|
915
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
916
|
+
json.dumps({
|
|
917
|
+
"mode": "spec",
|
|
918
|
+
"risk_profile": {
|
|
919
|
+
"high_risk": True,
|
|
920
|
+
"risk_probes_enabled": "true",
|
|
921
|
+
"pair_default_enabled": True,
|
|
922
|
+
},
|
|
923
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
924
|
+
}),
|
|
925
|
+
encoding="utf-8",
|
|
926
|
+
)
|
|
927
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
928
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
929
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
930
|
+
assert any(
|
|
931
|
+
finding.get("id") == "verify-risk-profile-malformed"
|
|
932
|
+
and "risk_profile.risk_probes_enabled must be a boolean" in str(finding.get("message"))
|
|
933
|
+
for finding in findings
|
|
934
|
+
), findings
|
|
935
|
+
|
|
936
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
937
|
+
json.dumps({
|
|
938
|
+
"mode": "spec",
|
|
939
|
+
"risk_profile": {
|
|
940
|
+
"high_risk": True,
|
|
941
|
+
"risk_probes_enabled": False,
|
|
942
|
+
"pair_default_enabled": True,
|
|
943
|
+
"reasons": ["explicit", 3],
|
|
944
|
+
},
|
|
945
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
946
|
+
}),
|
|
947
|
+
encoding="utf-8",
|
|
948
|
+
)
|
|
949
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
950
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
951
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
952
|
+
assert any(
|
|
953
|
+
finding.get("id") == "verify-risk-profile-malformed"
|
|
954
|
+
and "risk_profile.reasons must be a list of strings" in str(finding.get("message"))
|
|
955
|
+
for finding in findings
|
|
956
|
+
), findings
|
|
957
|
+
|
|
958
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
959
|
+
json.dumps({
|
|
960
|
+
"mode": "spec",
|
|
961
|
+
"pair_verify": True,
|
|
962
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
963
|
+
}),
|
|
964
|
+
encoding="utf-8",
|
|
965
|
+
)
|
|
966
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
967
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
968
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
969
|
+
assert any(
|
|
970
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
971
|
+
and "mode.pair-verify" in finding.get("message", "")
|
|
972
|
+
for finding in findings
|
|
973
|
+
), findings
|
|
974
|
+
|
|
975
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
976
|
+
json.dumps({
|
|
977
|
+
"mode": "spec",
|
|
978
|
+
"complexity": "large",
|
|
979
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
980
|
+
}),
|
|
981
|
+
encoding="utf-8",
|
|
982
|
+
)
|
|
983
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
984
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
985
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
986
|
+
assert any(
|
|
987
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
988
|
+
and "complexity.large" in str(finding.get("message"))
|
|
989
|
+
for finding in findings
|
|
990
|
+
), findings
|
|
991
|
+
|
|
992
|
+
spec_path = devlyn / "spec.md"
|
|
993
|
+
spec_path.write_text(
|
|
994
|
+
'---\nid: "spec-high"\ncomplexity: high\n---\n\n# Spec\n',
|
|
995
|
+
encoding="utf-8",
|
|
996
|
+
)
|
|
997
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
998
|
+
json.dumps({
|
|
999
|
+
"mode": "spec",
|
|
1000
|
+
"source": {"spec_path": str(spec_path)},
|
|
1001
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
1002
|
+
}),
|
|
1003
|
+
encoding="utf-8",
|
|
1004
|
+
)
|
|
1005
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1006
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1007
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1008
|
+
assert any(
|
|
1009
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
1010
|
+
and "spec.complexity.high" in str(finding.get("message"))
|
|
1011
|
+
for finding in findings
|
|
1012
|
+
), findings
|
|
1013
|
+
|
|
1014
|
+
spec_path.write_text(
|
|
1015
|
+
'---\nid: "spec-large"\ncomplexity: large\n---\n\n# Spec\n',
|
|
1016
|
+
encoding="utf-8",
|
|
1017
|
+
)
|
|
1018
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1019
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1020
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1021
|
+
assert any(
|
|
1022
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
1023
|
+
and "spec.complexity.large" in str(finding.get("message"))
|
|
1024
|
+
for finding in findings
|
|
1025
|
+
), findings
|
|
1026
|
+
|
|
1027
|
+
spec_path.write_text(
|
|
1028
|
+
"# Spec\n\n## Context\n\nsolo-headroom hypothesis: `SOLO_CLAUDE` should miss the priority rollback behavior; implementation token `rollback`.\n",
|
|
1029
|
+
encoding="utf-8",
|
|
1030
|
+
)
|
|
1031
|
+
assert spec_has_solo_headroom_hypothesis(
|
|
1032
|
+
{"source": {"spec_path": str(spec_path)}}
|
|
1033
|
+
) is False
|
|
1034
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1035
|
+
json.dumps({
|
|
1036
|
+
"mode": "spec",
|
|
1037
|
+
"source": {"spec_path": str(spec_path)},
|
|
1038
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
1039
|
+
}),
|
|
1040
|
+
encoding="utf-8",
|
|
1041
|
+
)
|
|
1042
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1043
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1044
|
+
assert summary["verdict"] == "PASS", summary
|
|
1045
|
+
assert not any(
|
|
1046
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
1047
|
+
and "spec.solo_headroom_hypothesis" in str(finding.get("message"))
|
|
1048
|
+
for finding in findings
|
|
1049
|
+
), findings
|
|
1050
|
+
|
|
1051
|
+
spec_path.write_text(
|
|
1052
|
+
"# Spec\n\n## Context\n\nsolo-headroom hypothesis: solo_claude should miss the priority rollback behavior.\nObservable command: `node check.js` exposes behavior.\n",
|
|
1053
|
+
encoding="utf-8",
|
|
1054
|
+
)
|
|
1055
|
+
assert spec_has_solo_headroom_hypothesis(
|
|
1056
|
+
{"source": {"spec_path": str(spec_path)}}
|
|
1057
|
+
) is False
|
|
1058
|
+
|
|
1059
|
+
spec_path.write_text(
|
|
1060
|
+
"# Spec\n\n## Context\n\nsolo-headroom hypothesis: `SOLO_CLAUDE` should miss the priority rollback behavior; observable `SOLO_CLAUDE` exposes the miss.\n",
|
|
1061
|
+
encoding="utf-8",
|
|
1062
|
+
)
|
|
1063
|
+
assert spec_has_solo_headroom_hypothesis(
|
|
1064
|
+
{"source": {"spec_path": str(spec_path)}}
|
|
1065
|
+
) is False
|
|
1066
|
+
|
|
1067
|
+
spec_path.write_text(
|
|
1068
|
+
"# Spec\n\n## Context\n\nsolo-headroom hypothesis: solo_claude should miss behavior where observable `priority rollback` exposes the miss.\n",
|
|
1069
|
+
encoding="utf-8",
|
|
1070
|
+
)
|
|
1071
|
+
assert spec_has_solo_headroom_hypothesis(
|
|
1072
|
+
{"source": {"spec_path": str(spec_path)}}
|
|
1073
|
+
) is False
|
|
1074
|
+
|
|
1075
|
+
spec_path.write_text(
|
|
1076
|
+
"# Spec\n\n## Context\n\nsolo-headroom hypothesis: `SOLO_CLAUDE` should miss the priority rollback behavior exposed by `node check.js`.\n",
|
|
1077
|
+
encoding="utf-8",
|
|
1078
|
+
)
|
|
1079
|
+
assert spec_has_solo_headroom_hypothesis(
|
|
1080
|
+
{"source": {"spec_path": str(spec_path)}}
|
|
1081
|
+
) is True
|
|
1082
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1083
|
+
json.dumps({
|
|
1084
|
+
"mode": "spec",
|
|
1085
|
+
"source": {"spec_path": str(spec_path)},
|
|
1086
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
1087
|
+
}),
|
|
1088
|
+
encoding="utf-8",
|
|
1089
|
+
)
|
|
1090
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1091
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1092
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1093
|
+
assert any(
|
|
1094
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
1095
|
+
and "spec.solo_headroom_hypothesis" in str(finding.get("message"))
|
|
1096
|
+
for finding in findings
|
|
1097
|
+
), findings
|
|
1098
|
+
|
|
1099
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1100
|
+
json.dumps({
|
|
1101
|
+
"mode": "spec",
|
|
1102
|
+
"source": {"spec_path": str(spec_path)},
|
|
1103
|
+
"risk_profile": {
|
|
1104
|
+
"high_risk": True,
|
|
1105
|
+
"risk_probes_enabled": False,
|
|
1106
|
+
"pair_default_enabled": True,
|
|
1107
|
+
},
|
|
1108
|
+
"phases": {
|
|
1109
|
+
"verify": {
|
|
1110
|
+
"verdict": "PASS",
|
|
1111
|
+
"sub_verdicts": {},
|
|
1112
|
+
"pair_trigger": {
|
|
1113
|
+
"eligible": True,
|
|
1114
|
+
"reasons": ["risk.high"],
|
|
1115
|
+
"skipped_reason": None,
|
|
1116
|
+
},
|
|
1117
|
+
}
|
|
1118
|
+
},
|
|
1119
|
+
}),
|
|
1120
|
+
encoding="utf-8",
|
|
1121
|
+
)
|
|
1122
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1123
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1124
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1125
|
+
assert any(
|
|
1126
|
+
finding.get("id") == "verify-pair-trigger-reasons-incomplete"
|
|
1127
|
+
and "spec.solo_headroom_hypothesis" in str(finding.get("message"))
|
|
1128
|
+
for finding in findings
|
|
1129
|
+
), findings
|
|
1130
|
+
|
|
1131
|
+
criteria_path = devlyn / "criteria.generated.md"
|
|
1132
|
+
criteria_path.write_text(
|
|
1133
|
+
"# Criteria\n\nsolo-headroom hypothesis: `SOLO_CLAUDE` should miss the priority rollback behavior exposed by `node check.js`.\n",
|
|
1134
|
+
encoding="utf-8",
|
|
1135
|
+
)
|
|
1136
|
+
assert spec_has_solo_headroom_hypothesis(
|
|
1137
|
+
{"source": {"criteria_path": str(criteria_path)}}
|
|
1138
|
+
) is True
|
|
1139
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1140
|
+
json.dumps({
|
|
1141
|
+
"mode": "free-form",
|
|
1142
|
+
"source": {"criteria_path": str(criteria_path)},
|
|
1143
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
1144
|
+
}),
|
|
1145
|
+
encoding="utf-8",
|
|
1146
|
+
)
|
|
1147
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1148
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1149
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1150
|
+
assert any(
|
|
1151
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
1152
|
+
and "spec.solo_headroom_hypothesis" in str(finding.get("message"))
|
|
1153
|
+
for finding in findings
|
|
1154
|
+
), findings
|
|
1155
|
+
|
|
1156
|
+
(devlyn / "verify-mechanical.findings.jsonl").write_text(
|
|
1157
|
+
json.dumps({"id": "m0", "severity": "HIGH"}) + "\n",
|
|
1158
|
+
encoding="utf-8",
|
|
1159
|
+
)
|
|
1160
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1161
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1162
|
+
assert summary["verdict"] == "NEEDS_WORK", summary
|
|
1163
|
+
assert not any(
|
|
1164
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
1165
|
+
for finding in findings
|
|
1166
|
+
), findings
|
|
1167
|
+
(devlyn / "verify-mechanical.findings.jsonl").write_text("", encoding="utf-8")
|
|
1168
|
+
|
|
1169
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1170
|
+
json.dumps({
|
|
1171
|
+
"phases": {
|
|
1172
|
+
"verify": {
|
|
1173
|
+
"verdict": "PASS",
|
|
1174
|
+
"sub_verdicts": {},
|
|
1175
|
+
"pair_trigger": {
|
|
1176
|
+
"eligible": "true",
|
|
1177
|
+
"reasons": ["risk.high"],
|
|
1178
|
+
"skipped_reason": None,
|
|
1179
|
+
},
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
}),
|
|
1183
|
+
encoding="utf-8",
|
|
1184
|
+
)
|
|
1185
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1186
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1187
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1188
|
+
assert any(
|
|
1189
|
+
finding.get("id") == "verify-pair-trigger-eligible-malformed"
|
|
1190
|
+
for finding in findings
|
|
1191
|
+
), findings
|
|
1192
|
+
|
|
1193
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1194
|
+
json.dumps({
|
|
1195
|
+
"phases": {
|
|
1196
|
+
"verify": {
|
|
1197
|
+
"verdict": "PASS",
|
|
1198
|
+
"sub_verdicts": {},
|
|
1199
|
+
"pair_trigger": {
|
|
1200
|
+
"eligible": True,
|
|
1201
|
+
"reasons": "risk.high",
|
|
1202
|
+
"skipped_reason": None,
|
|
1203
|
+
},
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}),
|
|
1207
|
+
encoding="utf-8",
|
|
1208
|
+
)
|
|
1209
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1210
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1211
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1212
|
+
assert any(
|
|
1213
|
+
finding.get("id") == "verify-pair-trigger-reasons-malformed"
|
|
1214
|
+
for finding in findings
|
|
1215
|
+
), findings
|
|
1216
|
+
|
|
1217
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1218
|
+
json.dumps({
|
|
1219
|
+
"phases": {
|
|
1220
|
+
"verify": {
|
|
1221
|
+
"verdict": "PASS",
|
|
1222
|
+
"sub_verdicts": {},
|
|
1223
|
+
"pair_trigger": {
|
|
1224
|
+
"eligible": True,
|
|
1225
|
+
"reasons": ["risk.high", "looks-hard"],
|
|
1226
|
+
"skipped_reason": None,
|
|
1227
|
+
},
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
}),
|
|
1231
|
+
encoding="utf-8",
|
|
1232
|
+
)
|
|
1233
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1234
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1235
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1236
|
+
assert any(
|
|
1237
|
+
finding.get("id") == "verify-pair-trigger-reasons-unknown"
|
|
1238
|
+
and "only include known" in finding.get("message", "")
|
|
1239
|
+
for finding in findings
|
|
1240
|
+
), findings
|
|
1241
|
+
|
|
1242
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1243
|
+
json.dumps({
|
|
1244
|
+
"phases": {
|
|
1245
|
+
"verify": {
|
|
1246
|
+
"verdict": "PASS",
|
|
1247
|
+
"sub_verdicts": {},
|
|
1248
|
+
"pair_trigger": {
|
|
1249
|
+
"eligible": True,
|
|
1250
|
+
"reasons": ["risk high"],
|
|
1251
|
+
"skipped_reason": None,
|
|
1252
|
+
},
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
}),
|
|
1256
|
+
encoding="utf-8",
|
|
1257
|
+
)
|
|
1258
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1259
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1260
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1261
|
+
assert any(
|
|
1262
|
+
finding.get("id") == "verify-pair-trigger-reasons-unknown"
|
|
1263
|
+
and "include a known" in finding.get("message", "")
|
|
1264
|
+
for finding in findings
|
|
1265
|
+
), findings
|
|
1266
|
+
|
|
1267
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1268
|
+
json.dumps({
|
|
1269
|
+
"phases": {
|
|
1270
|
+
"verify": {
|
|
1271
|
+
"verdict": "PASS",
|
|
1272
|
+
"sub_verdicts": {},
|
|
1273
|
+
"pair_trigger": {
|
|
1274
|
+
"eligible": True,
|
|
1275
|
+
"reasons": ["risk_profile.high_risk", "risk_probes_enabled"],
|
|
1276
|
+
"skipped_reason": None,
|
|
1277
|
+
},
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
}),
|
|
1281
|
+
encoding="utf-8",
|
|
1282
|
+
)
|
|
1283
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1284
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1285
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1286
|
+
assert any(
|
|
1287
|
+
finding.get("id") == "verify-pair-trigger-reasons-unknown"
|
|
1288
|
+
and "include a known" in finding.get("message", "")
|
|
1289
|
+
for finding in findings
|
|
1290
|
+
), findings
|
|
1291
|
+
|
|
1292
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1293
|
+
json.dumps({
|
|
1294
|
+
"phases": {
|
|
1295
|
+
"verify": {
|
|
1296
|
+
"verdict": "PASS",
|
|
1297
|
+
"sub_verdicts": {},
|
|
1298
|
+
"pair_trigger": {
|
|
1299
|
+
"eligible": True,
|
|
1300
|
+
"reasons": ["risk.high", 3],
|
|
1301
|
+
"skipped_reason": None,
|
|
1302
|
+
},
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
}),
|
|
1306
|
+
encoding="utf-8",
|
|
1307
|
+
)
|
|
1308
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1309
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1310
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1311
|
+
assert any(
|
|
1312
|
+
finding.get("id") == "verify-pair-trigger-reasons-malformed"
|
|
1313
|
+
for finding in findings
|
|
1314
|
+
), findings
|
|
1315
|
+
|
|
1316
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1317
|
+
json.dumps({
|
|
1318
|
+
"phases": {
|
|
1319
|
+
"verify": {
|
|
1320
|
+
"verdict": "PASS",
|
|
1321
|
+
"sub_verdicts": {},
|
|
1322
|
+
"pair_trigger": {
|
|
1323
|
+
"eligible": True,
|
|
1324
|
+
"reasons": [],
|
|
1325
|
+
"skipped_reason": None,
|
|
1326
|
+
},
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
}),
|
|
1330
|
+
encoding="utf-8",
|
|
1331
|
+
)
|
|
1332
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1333
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1334
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1335
|
+
assert any(
|
|
1336
|
+
finding.get("id") == "verify-pair-trigger-reasons-empty"
|
|
1337
|
+
for finding in findings
|
|
1338
|
+
), findings
|
|
1339
|
+
|
|
1340
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1341
|
+
json.dumps({
|
|
1342
|
+
"phases": {
|
|
1343
|
+
"verify": {
|
|
1344
|
+
"verdict": "PASS",
|
|
1345
|
+
"sub_verdicts": {},
|
|
1346
|
+
"pair_trigger": {
|
|
1347
|
+
"eligible": True,
|
|
1348
|
+
"reasons": ["risk.high"],
|
|
1349
|
+
"skipped_reason": "user_no_pair",
|
|
1350
|
+
},
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1353
|
+
}),
|
|
1354
|
+
encoding="utf-8",
|
|
1355
|
+
)
|
|
1356
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1357
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1358
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1359
|
+
assert any(
|
|
1360
|
+
finding.get("id") == "verify-pair-trigger-skip-contradiction"
|
|
1361
|
+
for finding in findings
|
|
1362
|
+
), findings
|
|
1363
|
+
|
|
1364
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1365
|
+
json.dumps({
|
|
1366
|
+
"phases": {
|
|
1367
|
+
"verify": {
|
|
1368
|
+
"verdict": "PASS",
|
|
1369
|
+
"sub_verdicts": {},
|
|
1370
|
+
"pair_trigger": {
|
|
1371
|
+
"eligible": False,
|
|
1372
|
+
"reasons": ["risk.high"],
|
|
1373
|
+
"skipped_reason": "user_no_pair",
|
|
1374
|
+
},
|
|
1375
|
+
}
|
|
1376
|
+
}
|
|
1377
|
+
}),
|
|
1378
|
+
encoding="utf-8",
|
|
1379
|
+
)
|
|
1380
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1381
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1382
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1383
|
+
assert any(
|
|
1384
|
+
finding.get("id") == "verify-pair-trigger-ineligible-reasons"
|
|
1385
|
+
for finding in findings
|
|
1386
|
+
), findings
|
|
1387
|
+
|
|
1388
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1389
|
+
json.dumps({
|
|
1390
|
+
"mode": "spec",
|
|
1391
|
+
"risk_profile": {
|
|
1392
|
+
"high_risk": True,
|
|
1393
|
+
"risk_probes_enabled": False,
|
|
1394
|
+
"pair_default_enabled": True,
|
|
1395
|
+
},
|
|
1396
|
+
"phases": {
|
|
1397
|
+
"verify": {
|
|
1398
|
+
"verdict": "PASS",
|
|
1399
|
+
"sub_verdicts": {},
|
|
1400
|
+
"pair_trigger": {
|
|
1401
|
+
"eligible": False,
|
|
1402
|
+
"reasons": [],
|
|
1403
|
+
"skipped_reason": None,
|
|
1404
|
+
},
|
|
1405
|
+
}
|
|
1406
|
+
},
|
|
1407
|
+
}),
|
|
1408
|
+
encoding="utf-8",
|
|
1409
|
+
)
|
|
1410
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1411
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1412
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1413
|
+
assert any(
|
|
1414
|
+
finding.get("id") == "verify-pair-trigger-ineligible-unjustified"
|
|
1415
|
+
and "risk.high" in str(finding.get("message"))
|
|
1416
|
+
for finding in findings
|
|
1417
|
+
), findings
|
|
1418
|
+
|
|
1419
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1420
|
+
json.dumps({
|
|
1421
|
+
"mode": "spec",
|
|
1422
|
+
"risk_profile": {
|
|
1423
|
+
"high_risk": True,
|
|
1424
|
+
"risk_probes_enabled": True,
|
|
1425
|
+
"pair_default_enabled": True,
|
|
1426
|
+
},
|
|
1427
|
+
"phases": {
|
|
1428
|
+
"verify": {
|
|
1429
|
+
"verdict": "PASS",
|
|
1430
|
+
"sub_verdicts": {},
|
|
1431
|
+
"pair_trigger": {
|
|
1432
|
+
"eligible": False,
|
|
1433
|
+
"reasons": [],
|
|
1434
|
+
"skipped_reason": "user_no_pair",
|
|
1435
|
+
},
|
|
1436
|
+
}
|
|
1437
|
+
},
|
|
1438
|
+
}),
|
|
1439
|
+
encoding="utf-8",
|
|
1440
|
+
)
|
|
1441
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1442
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1443
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1444
|
+
assert any(
|
|
1445
|
+
finding.get("id") == "verify-pair-trigger-user-no-pair-unsupported"
|
|
1446
|
+
and "pair_default_enabled false" in str(finding.get("message"))
|
|
1447
|
+
for finding in findings
|
|
1448
|
+
), findings
|
|
1449
|
+
|
|
1450
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1451
|
+
json.dumps({
|
|
1452
|
+
"pair_verify": True,
|
|
1453
|
+
"risk_profile": {
|
|
1454
|
+
"high_risk": True,
|
|
1455
|
+
"risk_probes_enabled": False,
|
|
1456
|
+
"pair_default_enabled": False,
|
|
1457
|
+
},
|
|
1458
|
+
"phases": {
|
|
1459
|
+
"verify": {
|
|
1460
|
+
"verdict": "PASS",
|
|
1461
|
+
"sub_verdicts": {},
|
|
1462
|
+
"pair_trigger": {
|
|
1463
|
+
"eligible": False,
|
|
1464
|
+
"reasons": [],
|
|
1465
|
+
"skipped_reason": "user_no_pair",
|
|
1466
|
+
},
|
|
1467
|
+
}
|
|
1468
|
+
},
|
|
1469
|
+
}),
|
|
1470
|
+
encoding="utf-8",
|
|
1471
|
+
)
|
|
1472
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1473
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1474
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1475
|
+
assert any(
|
|
1476
|
+
finding.get("id") == "verify-pair-trigger-conflicting-pair-flags"
|
|
1477
|
+
for finding in findings
|
|
1478
|
+
), findings
|
|
1479
|
+
|
|
1480
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1481
|
+
json.dumps({
|
|
1482
|
+
"mode": "spec",
|
|
1483
|
+
"risk_profile": {
|
|
1484
|
+
"high_risk": True,
|
|
1485
|
+
"risk_probes_enabled": True,
|
|
1486
|
+
"pair_default_enabled": False,
|
|
1487
|
+
},
|
|
1488
|
+
"phases": {
|
|
1489
|
+
"verify": {
|
|
1490
|
+
"verdict": "PASS",
|
|
1491
|
+
"sub_verdicts": {},
|
|
1492
|
+
"pair_trigger": {
|
|
1493
|
+
"eligible": False,
|
|
1494
|
+
"reasons": [],
|
|
1495
|
+
"skipped_reason": "user_no_pair",
|
|
1496
|
+
},
|
|
1497
|
+
}
|
|
1498
|
+
},
|
|
1499
|
+
}),
|
|
1500
|
+
encoding="utf-8",
|
|
1501
|
+
)
|
|
1502
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1503
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1504
|
+
assert summary["verdict"] == "PASS", summary
|
|
1505
|
+
assert not any(
|
|
1506
|
+
finding.get("id") == "verify-pair-trigger-required-missing"
|
|
1507
|
+
for finding in findings
|
|
1508
|
+
), findings
|
|
1509
|
+
|
|
1510
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1511
|
+
json.dumps({
|
|
1512
|
+
"phases": {
|
|
1513
|
+
"verify": {
|
|
1514
|
+
"verdict": "PASS",
|
|
1515
|
+
"sub_verdicts": {},
|
|
1516
|
+
"pair_trigger": {
|
|
1517
|
+
"eligible": False,
|
|
1518
|
+
"reasons": [],
|
|
1519
|
+
"skipped_reason": ["user_no_pair"],
|
|
1520
|
+
},
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
1523
|
+
}),
|
|
1524
|
+
encoding="utf-8",
|
|
1525
|
+
)
|
|
1526
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1527
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1528
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1529
|
+
assert any(
|
|
1530
|
+
finding.get("id") == "verify-pair-trigger-skipped-reason-malformed"
|
|
1531
|
+
for finding in findings
|
|
1532
|
+
), findings
|
|
1533
|
+
|
|
1534
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1535
|
+
json.dumps({
|
|
1536
|
+
"phases": {
|
|
1537
|
+
"verify": {
|
|
1538
|
+
"verdict": "PASS",
|
|
1539
|
+
"sub_verdicts": {},
|
|
1540
|
+
"pair_trigger": {
|
|
1541
|
+
"eligible": False,
|
|
1542
|
+
"reasons": [],
|
|
1543
|
+
"skipped_reason": "codex_unavailable",
|
|
1544
|
+
},
|
|
1545
|
+
}
|
|
1546
|
+
}
|
|
1547
|
+
}),
|
|
1548
|
+
encoding="utf-8",
|
|
1549
|
+
)
|
|
1550
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1551
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1552
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1553
|
+
assert any(
|
|
1554
|
+
finding.get("id") == "verify-pair-trigger-skipped-reason-unsupported"
|
|
1555
|
+
for finding in findings
|
|
1556
|
+
), findings
|
|
1557
|
+
|
|
1558
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1559
|
+
json.dumps({
|
|
1560
|
+
"phases": {
|
|
1561
|
+
"verify": {
|
|
1562
|
+
"verdict": "PASS",
|
|
1563
|
+
"sub_verdicts": {},
|
|
1564
|
+
"pair_trigger": {
|
|
1565
|
+
"eligible": False,
|
|
1566
|
+
"reasons": [],
|
|
1567
|
+
"skipped_reason": "mechanical_blocker",
|
|
1568
|
+
},
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
}),
|
|
1572
|
+
encoding="utf-8",
|
|
1573
|
+
)
|
|
1574
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1575
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1576
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1577
|
+
assert any(
|
|
1578
|
+
finding.get("id") == "verify-pair-trigger-mechanical-blocker-unsupported"
|
|
1579
|
+
for finding in findings
|
|
1580
|
+
), findings
|
|
1581
|
+
|
|
1582
|
+
(devlyn / "verify-mechanical.findings.jsonl").write_text(
|
|
1583
|
+
json.dumps({"id": "m1", "severity": "HIGH"}) + "\n",
|
|
1584
|
+
encoding="utf-8",
|
|
1585
|
+
)
|
|
1586
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1587
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1588
|
+
assert summary["verdict"] == "NEEDS_WORK", summary
|
|
1589
|
+
assert not any(
|
|
1590
|
+
finding.get("id") == "verify-pair-trigger-mechanical-blocker-unsupported"
|
|
1591
|
+
for finding in findings
|
|
1592
|
+
), findings
|
|
1593
|
+
(devlyn / "verify-mechanical.findings.jsonl").write_text("", encoding="utf-8")
|
|
1594
|
+
|
|
1595
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1596
|
+
json.dumps({
|
|
1597
|
+
"phases": {
|
|
1598
|
+
"verify": {
|
|
1599
|
+
"verdict": "PASS",
|
|
1600
|
+
"sub_verdicts": {},
|
|
1601
|
+
"pair_trigger": {
|
|
1602
|
+
"eligible": False,
|
|
1603
|
+
"reasons": [],
|
|
1604
|
+
"skipped_reason": "primary_judge_blocker",
|
|
1605
|
+
},
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
}),
|
|
1609
|
+
encoding="utf-8",
|
|
1610
|
+
)
|
|
1611
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1612
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1613
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1614
|
+
assert any(
|
|
1615
|
+
finding.get("id") == "verify-pair-trigger-primary-judge-blocker-unsupported"
|
|
1616
|
+
for finding in findings
|
|
1617
|
+
), findings
|
|
1618
|
+
|
|
1619
|
+
(devlyn / "verify.findings.jsonl").write_text(
|
|
1620
|
+
json.dumps({"id": "j2", "severity": "HIGH"}) + "\n",
|
|
1621
|
+
encoding="utf-8",
|
|
1622
|
+
)
|
|
1623
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1624
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1625
|
+
assert summary["verdict"] == "NEEDS_WORK", summary
|
|
1626
|
+
assert not any(
|
|
1627
|
+
finding.get("id") == "verify-pair-trigger-primary-judge-blocker-unsupported"
|
|
1628
|
+
for finding in findings
|
|
1629
|
+
), findings
|
|
1630
|
+
(devlyn / "verify.findings.jsonl").write_text("", encoding="utf-8")
|
|
1631
|
+
|
|
1632
|
+
(devlyn / "pipeline.state.json").write_text(
|
|
1633
|
+
json.dumps({
|
|
1634
|
+
"phases": {"verify": {"verdict": "PASS", "sub_verdicts": {}}},
|
|
1635
|
+
"verify": {
|
|
1636
|
+
"pair_trigger": {
|
|
1637
|
+
"eligible": True,
|
|
1638
|
+
"reasons": ["looks-hard"],
|
|
1639
|
+
"skipped_reason": None,
|
|
1640
|
+
}
|
|
1641
|
+
},
|
|
1642
|
+
}),
|
|
1643
|
+
encoding="utf-8",
|
|
1644
|
+
)
|
|
1645
|
+
findings, source_verdicts = read_findings(devlyn)
|
|
1646
|
+
summary = write_outputs(devlyn, findings, source_verdicts)
|
|
1647
|
+
assert summary["verdict"] == "BLOCKED", summary
|
|
1648
|
+
assert any(
|
|
1649
|
+
finding.get("id") == "verify-pair-trigger-reasons-unknown"
|
|
1650
|
+
for finding in findings
|
|
1651
|
+
), findings
|
|
302
1652
|
return 0
|
|
303
1653
|
|
|
304
1654
|
|