devlyn-cli 1.15.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +104 -0
- package/CLAUDE.md +135 -21
- package/README.md +43 -125
- package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +272 -0
- package/benchmark/auto-resolve/README.md +114 -0
- package/benchmark/auto-resolve/RUBRIC.md +162 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +30 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/expected.json +68 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/spec.md +45 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/task.txt +8 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +54 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json +170 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json +84 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/metadata.json +21 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-fail.json +214 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-pass.json +223 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/setup.sh +5 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md +56 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/task.txt +14 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +28 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected-pair-plan-registry.json +162 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +65 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json +19 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +56 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/task.txt +9 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +40 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/expected.json +57 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/setup.sh +6 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/task.txt +9 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +38 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/expected.json +65 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/setup.sh +55 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/task.txt +7 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +38 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/expected.json +77 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/task.txt +10 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +50 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/expected.json +76 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/setup.sh +36 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/spec.md +46 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/task.txt +7 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +50 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/expected.json +63 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +48 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/task.txt +1 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +93 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/expected.json +74 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/setup.sh +28 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +62 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/task.txt +5 -0
- package/benchmark/auto-resolve/fixtures/SCHEMA.md +130 -0
- package/benchmark/auto-resolve/fixtures/test-repo/README.md +27 -0
- package/benchmark/auto-resolve/fixtures/test-repo/bin/cli.js +63 -0
- package/benchmark/auto-resolve/fixtures/test-repo/package-lock.json +823 -0
- package/benchmark/auto-resolve/fixtures/test-repo/package.json +22 -0
- package/benchmark/auto-resolve/fixtures/test-repo/playwright.config.js +17 -0
- package/benchmark/auto-resolve/fixtures/test-repo/server/index.js +37 -0
- package/benchmark/auto-resolve/fixtures/test-repo/tests/cli.test.js +25 -0
- package/benchmark/auto-resolve/fixtures/test-repo/tests/server.test.js +58 -0
- package/benchmark/auto-resolve/fixtures/test-repo/web/index.html +37 -0
- package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +174 -0
- package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +256 -0
- package/benchmark/auto-resolve/scripts/compile-report.py +331 -0
- package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +552 -0
- package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +430 -0
- package/benchmark/auto-resolve/scripts/judge.sh +359 -0
- package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +260 -0
- package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +274 -0
- package/benchmark/auto-resolve/scripts/oracle-test-fidelity.py +328 -0
- package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +401 -0
- package/benchmark/auto-resolve/scripts/pair-plan-lint.py +468 -0
- package/benchmark/auto-resolve/scripts/run-fixture.sh +691 -0
- package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +234 -0
- package/benchmark/auto-resolve/scripts/run-suite.sh +214 -0
- package/benchmark/auto-resolve/scripts/ship-gate.py +222 -0
- package/bin/devlyn.js +175 -17
- package/config/skills/_shared/adapters/README.md +64 -0
- package/config/skills/_shared/adapters/gpt-5-5.md +29 -0
- package/config/skills/_shared/adapters/opus-4-7.md +29 -0
- package/config/skills/{devlyn:auto-resolve/scripts → _shared}/archive_run.py +26 -0
- package/config/skills/_shared/codex-config.md +54 -0
- package/config/skills/_shared/codex-monitored.sh +141 -0
- package/config/skills/_shared/engine-preflight.md +35 -0
- package/config/skills/_shared/expected.schema.json +93 -0
- package/config/skills/_shared/pair-plan-schema.md +298 -0
- package/config/skills/_shared/runtime-principles.md +110 -0
- package/config/skills/_shared/spec-verify-check.py +519 -0
- package/config/skills/devlyn:ideate/SKILL.md +99 -429
- package/config/skills/devlyn:ideate/references/elicitation.md +97 -0
- package/config/skills/devlyn:ideate/references/from-spec-mode.md +54 -0
- package/config/skills/devlyn:ideate/references/project-mode.md +76 -0
- package/config/skills/devlyn:ideate/references/spec-template.md +102 -0
- package/config/skills/devlyn:resolve/SKILL.md +172 -184
- package/config/skills/devlyn:resolve/references/free-form-mode.md +68 -0
- package/config/skills/devlyn:resolve/references/phases/build-gate.md +45 -0
- package/config/skills/devlyn:resolve/references/phases/cleanup.md +39 -0
- package/config/skills/devlyn:resolve/references/phases/implement.md +42 -0
- package/config/skills/devlyn:resolve/references/phases/plan.md +42 -0
- package/config/skills/devlyn:resolve/references/phases/verify.md +69 -0
- package/config/skills/devlyn:resolve/references/state-schema.md +106 -0
- package/{config/skills → optional-skills}/devlyn:design-system/SKILL.md +1 -0
- package/{config/skills → optional-skills}/devlyn:reap/SKILL.md +1 -0
- package/{config/skills → optional-skills}/devlyn:team-design-ui/SKILL.md +5 -0
- package/package.json +12 -2
- package/scripts/lint-skills.sh +431 -0
- package/config/skills/devlyn:auto-resolve/SKILL.md +0 -252
- package/config/skills/devlyn:auto-resolve/evals/evals.json +0 -21
- package/config/skills/devlyn:auto-resolve/evals/task-doctor-subcommand.md +0 -42
- package/config/skills/devlyn:auto-resolve/references/build-gate.md +0 -130
- package/config/skills/devlyn:auto-resolve/references/engine-routing.md +0 -82
- package/config/skills/devlyn:auto-resolve/references/findings-schema.md +0 -103
- package/config/skills/devlyn:auto-resolve/references/phases/phase-1-build.md +0 -54
- package/config/skills/devlyn:auto-resolve/references/phases/phase-2-evaluate.md +0 -45
- package/config/skills/devlyn:auto-resolve/references/phases/phase-3-critic.md +0 -84
- package/config/skills/devlyn:auto-resolve/references/pipeline-routing.md +0 -114
- package/config/skills/devlyn:auto-resolve/references/pipeline-state.md +0 -201
- package/config/skills/devlyn:auto-resolve/scripts/terminal_verdict.py +0 -96
- package/config/skills/devlyn:browser-validate/SKILL.md +0 -164
- package/config/skills/devlyn:browser-validate/references/flow-testing.md +0 -118
- package/config/skills/devlyn:browser-validate/references/tier1-chrome.md +0 -137
- package/config/skills/devlyn:browser-validate/references/tier2-playwright.md +0 -195
- package/config/skills/devlyn:browser-validate/references/tier3-curl.md +0 -57
- package/config/skills/devlyn:clean/SKILL.md +0 -285
- package/config/skills/devlyn:design-ui/SKILL.md +0 -351
- package/config/skills/devlyn:discover-product/SKILL.md +0 -124
- package/config/skills/devlyn:evaluate/SKILL.md +0 -564
- package/config/skills/devlyn:feature-spec/SKILL.md +0 -630
- package/config/skills/devlyn:ideate/references/challenge-rubric.md +0 -122
- package/config/skills/devlyn:ideate/references/codex-critic-template.md +0 -42
- package/config/skills/devlyn:ideate/references/templates/item-spec.md +0 -90
- package/config/skills/devlyn:implement-ui/SKILL.md +0 -466
- package/config/skills/devlyn:preflight/SKILL.md +0 -355
- package/config/skills/devlyn:preflight/references/auditors/browser-auditor.md +0 -32
- package/config/skills/devlyn:preflight/references/auditors/code-auditor.md +0 -86
- package/config/skills/devlyn:preflight/references/auditors/docs-auditor.md +0 -38
- package/config/skills/devlyn:product-spec/SKILL.md +0 -603
- package/config/skills/devlyn:recommend-features/SKILL.md +0 -286
- package/config/skills/devlyn:review/SKILL.md +0 -161
- package/config/skills/devlyn:team-resolve/SKILL.md +0 -631
- package/config/skills/devlyn:team-review/SKILL.md +0 -493
- package/config/skills/devlyn:update-docs/SKILL.md +0 -463
- package/config/skills/workflow-routing/SKILL.md +0 -73
- /package/{config/skills → optional-skills}/devlyn:reap/scripts/reap.sh +0 -0
- /package/{config/skills → optional-skills}/devlyn:reap/scripts/scan.sh +0 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
pair-plan-lint.py — validate a `pair-plan.json` against its registry and
|
|
4
|
+
the `pair-plan-schema.md` hard rules.
|
|
5
|
+
|
|
6
|
+
Inputs:
|
|
7
|
+
--plan <path> path to pair-plan.json (required)
|
|
8
|
+
--registry <path> override registry path (default: resolved from
|
|
9
|
+
plan.source.canonical_id_registry_path)
|
|
10
|
+
--quiet suppress stderr human summary
|
|
11
|
+
|
|
12
|
+
Output:
|
|
13
|
+
stdout — `{"ok": bool, "errors": [{code, message, ...}, ...]}` (machine-readable)
|
|
14
|
+
stderr — short human summary unless --quiet (skipped on --quiet)
|
|
15
|
+
|
|
16
|
+
Exit code:
|
|
17
|
+
0 on pass, 1 on fail.
|
|
18
|
+
|
|
19
|
+
Schema source: config/skills/_shared/pair-plan-schema.md (iter-0022 ship).
|
|
20
|
+
"""
|
|
21
|
+
import argparse
|
|
22
|
+
import copy
|
|
23
|
+
import hashlib
|
|
24
|
+
import json
|
|
25
|
+
import pathlib
|
|
26
|
+
import sys
|
|
27
|
+
|
|
28
|
+
SCHEMA_VERSION = "1"
|
|
29
|
+
AUTHORITY_ORDER_CANONICAL = [
|
|
30
|
+
"spec.md",
|
|
31
|
+
"expected.json/rubric",
|
|
32
|
+
"phase prompt",
|
|
33
|
+
"model preference",
|
|
34
|
+
]
|
|
35
|
+
REQUIRED_TOP_LEVEL = [
|
|
36
|
+
"schema_version",
|
|
37
|
+
"plan_status",
|
|
38
|
+
"planning_mode",
|
|
39
|
+
"source",
|
|
40
|
+
"authority_order",
|
|
41
|
+
"rounds",
|
|
42
|
+
"accepted_invariants",
|
|
43
|
+
"rejected_alternatives",
|
|
44
|
+
"unresolved",
|
|
45
|
+
"escalated_to_user",
|
|
46
|
+
"model_stamps",
|
|
47
|
+
]
|
|
48
|
+
REQUIRED_SOURCE_FIELDS = [
|
|
49
|
+
"spec_path",
|
|
50
|
+
"spec_sha256",
|
|
51
|
+
"rubric_path",
|
|
52
|
+
"rubric_sha256",
|
|
53
|
+
"canonical_id_registry_path",
|
|
54
|
+
"canonical_id_registry_sha256",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# JSON loading with strict-keys.
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
def _strict_pairs(pairs):
|
|
62
|
+
keys = [k for k, _ in pairs]
|
|
63
|
+
if len(keys) != len(set(keys)):
|
|
64
|
+
raise ValueError("duplicate key in pair-plan.json")
|
|
65
|
+
return dict(pairs)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def load_strict_json(path):
|
|
69
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
70
|
+
return json.load(f, object_pairs_hook=_strict_pairs)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Hash helpers.
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
def file_sha256(path):
|
|
77
|
+
with open(path, "rb") as f:
|
|
78
|
+
return hashlib.sha256(f.read()).hexdigest()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def canonical_pre_stamp_sha256(plan):
|
|
82
|
+
pre = copy.deepcopy(plan)
|
|
83
|
+
pre["model_stamps"] = {}
|
|
84
|
+
s = json.dumps(
|
|
85
|
+
pre,
|
|
86
|
+
sort_keys=True,
|
|
87
|
+
separators=(",", ":"),
|
|
88
|
+
ensure_ascii=False,
|
|
89
|
+
allow_nan=False,
|
|
90
|
+
)
|
|
91
|
+
return hashlib.sha256(s.encode("utf-8")).hexdigest()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
# Individual checks (each returns a list of error dicts).
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
PLAN_STATUS_VALID = {"final", "blocked", "draft"}
|
|
98
|
+
PLANNING_MODE_VALID = {"solo", "pair"}
|
|
99
|
+
ACCEPTED_INVARIANT_REQUIRED_FIELDS = ("id", "source_refs", "operational_check", "authority")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def check_top_level_shape(plan):
|
|
103
|
+
errs = []
|
|
104
|
+
if not isinstance(plan, dict):
|
|
105
|
+
return [{"code": "plan_not_object", "message": "pair-plan.json root must be a JSON object"}]
|
|
106
|
+
for f in REQUIRED_TOP_LEVEL:
|
|
107
|
+
if f not in plan:
|
|
108
|
+
errs.append({"code": "missing_top_level_field", "field": f,
|
|
109
|
+
"message": f"required top-level field missing: {f}"})
|
|
110
|
+
if plan.get("schema_version") != SCHEMA_VERSION:
|
|
111
|
+
errs.append({"code": "schema_version_mismatch",
|
|
112
|
+
"expected": SCHEMA_VERSION,
|
|
113
|
+
"got": plan.get("schema_version"),
|
|
114
|
+
"message": f"schema_version must be {SCHEMA_VERSION!r}"})
|
|
115
|
+
if plan.get("plan_status") not in PLAN_STATUS_VALID:
|
|
116
|
+
errs.append({"code": "plan_status_invalid",
|
|
117
|
+
"got": plan.get("plan_status"),
|
|
118
|
+
"message": f"plan_status must be one of {sorted(PLAN_STATUS_VALID)}"})
|
|
119
|
+
if plan.get("planning_mode") not in PLANNING_MODE_VALID:
|
|
120
|
+
errs.append({"code": "planning_mode_invalid",
|
|
121
|
+
"got": plan.get("planning_mode"),
|
|
122
|
+
"message": f"planning_mode must be one of {sorted(PLANNING_MODE_VALID)}"})
|
|
123
|
+
return errs
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def check_accepted_invariants_shape(plan):
|
|
127
|
+
"""Each accepted_invariants[] item MUST carry id + source_refs + operational_check + authority.
|
|
128
|
+
A plan with empty/missing operational_check has nothing for BUILD/EVAL/CRITIC to enforce."""
|
|
129
|
+
errs = []
|
|
130
|
+
for i, item in enumerate(plan.get("accepted_invariants") or []):
|
|
131
|
+
if not isinstance(item, dict):
|
|
132
|
+
errs.append({"code": "accepted_invariant_not_object",
|
|
133
|
+
"index": i,
|
|
134
|
+
"message": f"accepted_invariants[{i}] is not a JSON object"})
|
|
135
|
+
continue
|
|
136
|
+
for f in ACCEPTED_INVARIANT_REQUIRED_FIELDS:
|
|
137
|
+
if f not in item or item[f] in (None, "", []):
|
|
138
|
+
errs.append({"code": "accepted_invariant_missing_field",
|
|
139
|
+
"index": i,
|
|
140
|
+
"id": item.get("id"),
|
|
141
|
+
"field": f,
|
|
142
|
+
"message": f"accepted_invariants[{i}].{f} missing or empty"})
|
|
143
|
+
return errs
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def check_registry_shape(registry):
|
|
147
|
+
"""Lint MUST reject unsorted required_invariants and unknown top-level fields per schema doc."""
|
|
148
|
+
errs = []
|
|
149
|
+
allowed = {"schema_version", "fixture_id", "generated_at", "generated_from", "required_invariants"}
|
|
150
|
+
extra = sorted(set(registry.keys()) - allowed)
|
|
151
|
+
if extra:
|
|
152
|
+
errs.append({"code": "registry_unknown_field",
|
|
153
|
+
"fields": extra,
|
|
154
|
+
"message": f"canonical_id_registry.json has unknown top-level fields: {extra}"})
|
|
155
|
+
inv = registry.get("required_invariants") or []
|
|
156
|
+
ids = [e.get("id") for e in inv]
|
|
157
|
+
if ids != sorted(ids):
|
|
158
|
+
errs.append({"code": "registry_unsorted",
|
|
159
|
+
"message": "required_invariants[] must be sorted lexicographically by id"})
|
|
160
|
+
return errs
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def check_authority_order(plan):
|
|
164
|
+
if plan.get("authority_order") != AUTHORITY_ORDER_CANONICAL:
|
|
165
|
+
return [{"code": "authority_order_drift",
|
|
166
|
+
"expected": AUTHORITY_ORDER_CANONICAL,
|
|
167
|
+
"got": plan.get("authority_order"),
|
|
168
|
+
"message": "authority_order must be the canonical 4-string snapshot"}]
|
|
169
|
+
return []
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def check_unresolved_status(plan):
|
|
173
|
+
unresolved = plan.get("unresolved") or []
|
|
174
|
+
status = plan.get("plan_status")
|
|
175
|
+
if len(unresolved) > 0 and status not in ("blocked", "draft"):
|
|
176
|
+
return [{"code": "unresolved_with_final_status",
|
|
177
|
+
"unresolved_count": len(unresolved),
|
|
178
|
+
"plan_status": status,
|
|
179
|
+
"message": "unresolved is non-empty but plan_status is not 'blocked' or 'draft'"}]
|
|
180
|
+
return []
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def check_escalated_status(plan):
|
|
184
|
+
escalated = plan.get("escalated_to_user") or []
|
|
185
|
+
status = plan.get("plan_status")
|
|
186
|
+
errs = []
|
|
187
|
+
if escalated:
|
|
188
|
+
if status in ("blocked", "draft"):
|
|
189
|
+
return errs
|
|
190
|
+
for i, item in enumerate(escalated):
|
|
191
|
+
if "user_resolution" not in item:
|
|
192
|
+
errs.append({"code": "escalated_without_resolution",
|
|
193
|
+
"index": i,
|
|
194
|
+
"id": item.get("id"),
|
|
195
|
+
"message": "escalated_to_user[] item missing user_resolution while plan_status is final"})
|
|
196
|
+
return errs
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def check_planning_mode_rounds(plan):
|
|
200
|
+
mode = plan.get("planning_mode")
|
|
201
|
+
rounds = plan.get("rounds") or []
|
|
202
|
+
if mode == "pair" and len(rounds) < 1:
|
|
203
|
+
return [{"code": "pair_mode_no_rounds",
|
|
204
|
+
"message": "planning_mode=pair requires rounds.length >= 1"}]
|
|
205
|
+
if mode == "solo" and len(rounds) != 0:
|
|
206
|
+
return [{"code": "solo_mode_with_rounds",
|
|
207
|
+
"message": "planning_mode=solo requires rounds.length == 0"}]
|
|
208
|
+
return []
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def check_accepted_ids_in_registry(plan, registry):
|
|
212
|
+
registry_ids = {e["id"] for e in registry.get("required_invariants", [])}
|
|
213
|
+
errs = []
|
|
214
|
+
for i, item in enumerate(plan.get("accepted_invariants") or []):
|
|
215
|
+
rid = item.get("id")
|
|
216
|
+
if rid not in registry_ids:
|
|
217
|
+
errs.append({"code": "accepted_id_not_in_registry",
|
|
218
|
+
"index": i,
|
|
219
|
+
"id": rid,
|
|
220
|
+
"message": f"accepted_invariants[{i}].id={rid!r} not present in registry"})
|
|
221
|
+
return errs
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def check_registry_coverage(plan, registry):
|
|
225
|
+
"""Every registry required_invariants[].id must be accounted for in a final plan.
|
|
226
|
+
|
|
227
|
+
Coverage is enforced only when plan_status == "final" — draft / blocked
|
|
228
|
+
plans are allowed to have un-decided ids in `unresolved[]` (Hard Rule #1
|
|
229
|
+
in pair-plan-schema.md) without tripping coverage. `unresolved[]` items
|
|
230
|
+
of shape `{id, note}` count as accounted-for at the structural level.
|
|
231
|
+
"""
|
|
232
|
+
if plan.get("plan_status") != "final":
|
|
233
|
+
return []
|
|
234
|
+
registry_ids = [e["id"] for e in registry.get("required_invariants", [])]
|
|
235
|
+
accepted = {item.get("id") for item in (plan.get("accepted_invariants") or [])}
|
|
236
|
+
rejected_conflicts = set()
|
|
237
|
+
for item in plan.get("rejected_alternatives") or []:
|
|
238
|
+
for cid in item.get("conflicts_with_ids", []) or []:
|
|
239
|
+
rejected_conflicts.add(cid)
|
|
240
|
+
escalated = {item.get("id") for item in (plan.get("escalated_to_user") or [])}
|
|
241
|
+
unresolved = {item.get("id") for item in (plan.get("unresolved") or []) if isinstance(item, dict)}
|
|
242
|
+
accounted = accepted | rejected_conflicts | escalated | unresolved
|
|
243
|
+
errs = []
|
|
244
|
+
for rid in registry_ids:
|
|
245
|
+
if rid not in accounted:
|
|
246
|
+
errs.append({"code": "missing_required_id",
|
|
247
|
+
"id": rid,
|
|
248
|
+
"message": f"registry id {rid!r} is not in accepted_invariants, rejected_alternatives.conflicts_with_ids, escalated_to_user, or unresolved"})
|
|
249
|
+
return errs
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def check_model_stamps(plan):
|
|
253
|
+
stamps = plan.get("model_stamps") or {}
|
|
254
|
+
errs = []
|
|
255
|
+
for who in ("claude", "codex"):
|
|
256
|
+
s = stamps.get(who)
|
|
257
|
+
if not isinstance(s, dict):
|
|
258
|
+
errs.append({"code": "stamp_missing", "who": who,
|
|
259
|
+
"message": f"model_stamps.{who} missing or not an object"})
|
|
260
|
+
continue
|
|
261
|
+
if s.get("status") not in ("sign", "block"):
|
|
262
|
+
errs.append({"code": "stamp_status_invalid", "who": who,
|
|
263
|
+
"got": s.get("status"),
|
|
264
|
+
"message": f"model_stamps.{who}.status must be 'sign' or 'block'"})
|
|
265
|
+
if "signed_plan_sha256" not in s:
|
|
266
|
+
errs.append({"code": "stamp_sha_missing", "who": who,
|
|
267
|
+
"message": f"model_stamps.{who}.signed_plan_sha256 missing"})
|
|
268
|
+
if plan.get("plan_status") == "final":
|
|
269
|
+
for who in ("claude", "codex"):
|
|
270
|
+
s = stamps.get(who) or {}
|
|
271
|
+
if s.get("status") != "sign":
|
|
272
|
+
errs.append({"code": "final_with_non_sign_stamp",
|
|
273
|
+
"who": who,
|
|
274
|
+
"status": s.get("status"),
|
|
275
|
+
"message": f"plan_status=final requires model_stamps.{who}.status=sign"})
|
|
276
|
+
return errs
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def check_signed_pre_stamp_sha(plan):
|
|
280
|
+
expected = canonical_pre_stamp_sha256(plan)
|
|
281
|
+
stamps = plan.get("model_stamps") or {}
|
|
282
|
+
claude_sha = (stamps.get("claude") or {}).get("signed_plan_sha256")
|
|
283
|
+
codex_sha = (stamps.get("codex") or {}).get("signed_plan_sha256")
|
|
284
|
+
errs = []
|
|
285
|
+
if claude_sha != codex_sha:
|
|
286
|
+
errs.append({"code": "stamp_sha_disagree",
|
|
287
|
+
"claude_sha": claude_sha,
|
|
288
|
+
"codex_sha": codex_sha,
|
|
289
|
+
"message": "model_stamps.{claude,codex}.signed_plan_sha256 must be byte-identical"})
|
|
290
|
+
for who, sha in [("claude", claude_sha), ("codex", codex_sha)]:
|
|
291
|
+
if sha is not None and sha != expected:
|
|
292
|
+
errs.append({"code": "stamp_sha_mismatch_canonical",
|
|
293
|
+
"who": who,
|
|
294
|
+
"got": sha,
|
|
295
|
+
"expected": expected,
|
|
296
|
+
"message": f"model_stamps.{who}.signed_plan_sha256 does not equal canonical pre-stamp sha"})
|
|
297
|
+
return errs
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def check_source_file_shas(plan, plan_path):
|
|
301
|
+
"""Each source.<x>_sha256 must equal the raw-bytes sha256 of the referenced file."""
|
|
302
|
+
src = plan.get("source") or {}
|
|
303
|
+
plan_dir = pathlib.Path(plan_path).resolve().parent
|
|
304
|
+
errs = []
|
|
305
|
+
pairs = [
|
|
306
|
+
("spec_path", "spec_sha256"),
|
|
307
|
+
("expected_path", "expected_sha256"),
|
|
308
|
+
("rubric_path", "rubric_sha256"),
|
|
309
|
+
("canonical_id_registry_path", "canonical_id_registry_sha256"),
|
|
310
|
+
]
|
|
311
|
+
for path_field, sha_field in pairs:
|
|
312
|
+
# expected_path is optional only when expected.json is genuinely absent;
|
|
313
|
+
# rubric/registry/spec are always required.
|
|
314
|
+
if path_field == "expected_path" and path_field not in src:
|
|
315
|
+
continue
|
|
316
|
+
if path_field not in src:
|
|
317
|
+
errs.append({"code": "source_path_missing",
|
|
318
|
+
"field": path_field,
|
|
319
|
+
"message": f"source.{path_field} missing"})
|
|
320
|
+
continue
|
|
321
|
+
if sha_field not in src:
|
|
322
|
+
errs.append({"code": "source_sha_missing",
|
|
323
|
+
"field": sha_field,
|
|
324
|
+
"message": f"source.{sha_field} missing"})
|
|
325
|
+
continue
|
|
326
|
+
path_str = src[path_field]
|
|
327
|
+
path_abs = _resolve_repo_path(path_str, plan_dir)
|
|
328
|
+
if path_abs is None:
|
|
329
|
+
errs.append({"code": "source_path_unreadable",
|
|
330
|
+
"field": path_field,
|
|
331
|
+
"path": str(path_abs),
|
|
332
|
+
"message": f"source.{path_field} resolves to a path that does not exist"})
|
|
333
|
+
continue
|
|
334
|
+
actual = file_sha256(path_abs)
|
|
335
|
+
if actual != src[sha_field]:
|
|
336
|
+
errs.append({"code": "source_sha_drift",
|
|
337
|
+
"field": sha_field,
|
|
338
|
+
"expected": src[sha_field],
|
|
339
|
+
"actual": actual,
|
|
340
|
+
"path": str(path_abs),
|
|
341
|
+
"message": f"source.{sha_field} does not match raw-bytes sha of {path_str}"})
|
|
342
|
+
return errs
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# ---------------------------------------------------------------------------
|
|
346
|
+
# Path resolution — repo-relative paths in plan/source live in the repo,
|
|
347
|
+
# but the plan itself can be anywhere (e.g. /tmp during tests). Try three
|
|
348
|
+
# anchors in order: absolute path → git-root ancestor of plan_dir → cwd.
|
|
349
|
+
# Returns the first existing absolute path, or None if no anchor yields one.
|
|
350
|
+
# ---------------------------------------------------------------------------
|
|
351
|
+
def _git_root_ancestor(start):
|
|
352
|
+
s = pathlib.Path(start).resolve()
|
|
353
|
+
while s != s.parent:
|
|
354
|
+
if (s / ".git").exists():
|
|
355
|
+
return s
|
|
356
|
+
s = s.parent
|
|
357
|
+
return None
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _resolve_repo_path(path_str, plan_dir):
|
|
361
|
+
p = pathlib.Path(path_str)
|
|
362
|
+
if p.is_absolute():
|
|
363
|
+
return p if p.exists() else None
|
|
364
|
+
candidates = []
|
|
365
|
+
repo_root = _git_root_ancestor(plan_dir)
|
|
366
|
+
if repo_root is not None:
|
|
367
|
+
candidates.append((repo_root / p).resolve())
|
|
368
|
+
candidates.append((pathlib.Path.cwd() / p).resolve())
|
|
369
|
+
candidates.append((plan_dir / p).resolve())
|
|
370
|
+
seen = set()
|
|
371
|
+
for c in candidates:
|
|
372
|
+
if c in seen:
|
|
373
|
+
continue
|
|
374
|
+
seen.add(c)
|
|
375
|
+
if c.exists():
|
|
376
|
+
return c
|
|
377
|
+
return None
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def resolve_registry_from_plan(plan, plan_path):
|
|
381
|
+
src = plan.get("source") or {}
|
|
382
|
+
p = src.get("canonical_id_registry_path")
|
|
383
|
+
if p is None:
|
|
384
|
+
return None
|
|
385
|
+
plan_dir = pathlib.Path(plan_path).resolve().parent
|
|
386
|
+
return _resolve_repo_path(p, plan_dir)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# ---------------------------------------------------------------------------
|
|
390
|
+
# Top-level lint.
|
|
391
|
+
# ---------------------------------------------------------------------------
|
|
392
|
+
def lint(plan_path, registry_override=None):
|
|
393
|
+
try:
|
|
394
|
+
plan = load_strict_json(plan_path)
|
|
395
|
+
except json.JSONDecodeError as e:
|
|
396
|
+
return {"ok": False, "errors": [{"code": "plan_invalid_json",
|
|
397
|
+
"message": f"plan parse error: {e}"}]}
|
|
398
|
+
except ValueError as e:
|
|
399
|
+
return {"ok": False, "errors": [{"code": "plan_duplicate_keys",
|
|
400
|
+
"message": str(e)}]}
|
|
401
|
+
except FileNotFoundError:
|
|
402
|
+
return {"ok": False, "errors": [{"code": "plan_not_found",
|
|
403
|
+
"message": f"plan file not found: {plan_path}"}]}
|
|
404
|
+
|
|
405
|
+
# top-level shape first; skip downstream checks if shape is broken
|
|
406
|
+
shape_errs = check_top_level_shape(plan)
|
|
407
|
+
if shape_errs:
|
|
408
|
+
return {"ok": False, "errors": shape_errs}
|
|
409
|
+
|
|
410
|
+
# resolve registry
|
|
411
|
+
if registry_override:
|
|
412
|
+
registry_path = pathlib.Path(registry_override).resolve()
|
|
413
|
+
else:
|
|
414
|
+
registry_path = resolve_registry_from_plan(plan, plan_path)
|
|
415
|
+
if registry_path is None or not registry_path.exists():
|
|
416
|
+
return {"ok": False, "errors": [{
|
|
417
|
+
"code": "registry_unreachable",
|
|
418
|
+
"registry_path": str(registry_path) if registry_path else None,
|
|
419
|
+
"message": "could not resolve canonical_id_registry; provide --registry or fix source.canonical_id_registry_path",
|
|
420
|
+
}]}
|
|
421
|
+
try:
|
|
422
|
+
registry = load_strict_json(registry_path)
|
|
423
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
424
|
+
return {"ok": False, "errors": [{"code": "registry_invalid",
|
|
425
|
+
"registry_path": str(registry_path),
|
|
426
|
+
"message": str(e)}]}
|
|
427
|
+
|
|
428
|
+
errors = []
|
|
429
|
+
errors += check_registry_shape(registry)
|
|
430
|
+
errors += check_authority_order(plan)
|
|
431
|
+
errors += check_accepted_invariants_shape(plan)
|
|
432
|
+
errors += check_unresolved_status(plan)
|
|
433
|
+
errors += check_escalated_status(plan)
|
|
434
|
+
errors += check_planning_mode_rounds(plan)
|
|
435
|
+
errors += check_accepted_ids_in_registry(plan, registry)
|
|
436
|
+
errors += check_registry_coverage(plan, registry)
|
|
437
|
+
errors += check_model_stamps(plan)
|
|
438
|
+
errors += check_signed_pre_stamp_sha(plan)
|
|
439
|
+
errors += check_source_file_shas(plan, plan_path)
|
|
440
|
+
|
|
441
|
+
return {"ok": not errors, "errors": errors,
|
|
442
|
+
"registry_path": str(registry_path)}
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def main():
|
|
446
|
+
ap = argparse.ArgumentParser()
|
|
447
|
+
ap.add_argument("--plan", required=True)
|
|
448
|
+
ap.add_argument("--registry", default=None,
|
|
449
|
+
help="Override registry path (default: source.canonical_id_registry_path)")
|
|
450
|
+
ap.add_argument("--quiet", action="store_true")
|
|
451
|
+
args = ap.parse_args()
|
|
452
|
+
|
|
453
|
+
verdict = lint(args.plan, registry_override=args.registry)
|
|
454
|
+
|
|
455
|
+
print(json.dumps(verdict, indent=2, sort_keys=True))
|
|
456
|
+
if not args.quiet:
|
|
457
|
+
if verdict["ok"]:
|
|
458
|
+
print(f"pair-plan-lint: PASS — {args.plan} (registry: {verdict.get('registry_path')})", file=sys.stderr)
|
|
459
|
+
else:
|
|
460
|
+
print(f"pair-plan-lint: FAIL — {args.plan} ({len(verdict['errors'])} error(s))", file=sys.stderr)
|
|
461
|
+
for e in verdict["errors"]:
|
|
462
|
+
print(f" [{e.get('code')}] {e.get('message')}", file=sys.stderr)
|
|
463
|
+
|
|
464
|
+
sys.exit(0 if verdict["ok"] else 1)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
if __name__ == "__main__":
|
|
468
|
+
main()
|