devlyn-cli 1.15.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/AGENTS.md +104 -0
  2. package/CLAUDE.md +135 -21
  3. package/README.md +43 -125
  4. package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +272 -0
  5. package/benchmark/auto-resolve/README.md +114 -0
  6. package/benchmark/auto-resolve/RUBRIC.md +162 -0
  7. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +30 -0
  8. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/expected.json +68 -0
  9. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/metadata.json +10 -0
  10. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/setup.sh +4 -0
  11. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/spec.md +45 -0
  12. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/task.txt +8 -0
  13. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +54 -0
  14. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json +170 -0
  15. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json +84 -0
  16. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/metadata.json +21 -0
  17. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-fail.json +214 -0
  18. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-pass.json +223 -0
  19. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/setup.sh +5 -0
  20. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md +56 -0
  21. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/task.txt +14 -0
  22. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +28 -0
  23. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected-pair-plan-registry.json +162 -0
  24. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +65 -0
  25. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json +19 -0
  26. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/setup.sh +4 -0
  27. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +56 -0
  28. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/task.txt +9 -0
  29. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +40 -0
  30. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/expected.json +57 -0
  31. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/metadata.json +10 -0
  32. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/setup.sh +6 -0
  33. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/spec.md +49 -0
  34. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/task.txt +9 -0
  35. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +38 -0
  36. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/expected.json +65 -0
  37. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/metadata.json +10 -0
  38. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/setup.sh +55 -0
  39. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/spec.md +49 -0
  40. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/task.txt +7 -0
  41. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +38 -0
  42. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/expected.json +77 -0
  43. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/metadata.json +10 -0
  44. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/setup.sh +4 -0
  45. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/spec.md +49 -0
  46. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/task.txt +10 -0
  47. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +50 -0
  48. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/expected.json +76 -0
  49. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/metadata.json +10 -0
  50. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/setup.sh +36 -0
  51. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/spec.md +46 -0
  52. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/task.txt +7 -0
  53. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +50 -0
  54. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/expected.json +63 -0
  55. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/metadata.json +10 -0
  56. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/setup.sh +4 -0
  57. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +48 -0
  58. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/task.txt +1 -0
  59. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +93 -0
  60. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/expected.json +74 -0
  61. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/metadata.json +10 -0
  62. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/setup.sh +28 -0
  63. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +62 -0
  64. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/task.txt +5 -0
  65. package/benchmark/auto-resolve/fixtures/SCHEMA.md +130 -0
  66. package/benchmark/auto-resolve/fixtures/test-repo/README.md +27 -0
  67. package/benchmark/auto-resolve/fixtures/test-repo/bin/cli.js +63 -0
  68. package/benchmark/auto-resolve/fixtures/test-repo/package-lock.json +823 -0
  69. package/benchmark/auto-resolve/fixtures/test-repo/package.json +22 -0
  70. package/benchmark/auto-resolve/fixtures/test-repo/playwright.config.js +17 -0
  71. package/benchmark/auto-resolve/fixtures/test-repo/server/index.js +37 -0
  72. package/benchmark/auto-resolve/fixtures/test-repo/tests/cli.test.js +25 -0
  73. package/benchmark/auto-resolve/fixtures/test-repo/tests/server.test.js +58 -0
  74. package/benchmark/auto-resolve/fixtures/test-repo/web/index.html +37 -0
  75. package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +174 -0
  76. package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +256 -0
  77. package/benchmark/auto-resolve/scripts/compile-report.py +331 -0
  78. package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +552 -0
  79. package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +430 -0
  80. package/benchmark/auto-resolve/scripts/judge.sh +359 -0
  81. package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +260 -0
  82. package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +274 -0
  83. package/benchmark/auto-resolve/scripts/oracle-test-fidelity.py +328 -0
  84. package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +401 -0
  85. package/benchmark/auto-resolve/scripts/pair-plan-lint.py +468 -0
  86. package/benchmark/auto-resolve/scripts/run-fixture.sh +691 -0
  87. package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +234 -0
  88. package/benchmark/auto-resolve/scripts/run-suite.sh +214 -0
  89. package/benchmark/auto-resolve/scripts/ship-gate.py +222 -0
  90. package/bin/devlyn.js +175 -17
  91. package/config/skills/_shared/adapters/README.md +64 -0
  92. package/config/skills/_shared/adapters/gpt-5-5.md +29 -0
  93. package/config/skills/_shared/adapters/opus-4-7.md +29 -0
  94. package/config/skills/{devlyn:auto-resolve/scripts → _shared}/archive_run.py +26 -0
  95. package/config/skills/_shared/codex-config.md +54 -0
  96. package/config/skills/_shared/codex-monitored.sh +141 -0
  97. package/config/skills/_shared/engine-preflight.md +35 -0
  98. package/config/skills/_shared/expected.schema.json +93 -0
  99. package/config/skills/_shared/pair-plan-schema.md +298 -0
  100. package/config/skills/_shared/runtime-principles.md +110 -0
  101. package/config/skills/_shared/spec-verify-check.py +519 -0
  102. package/config/skills/devlyn:ideate/SKILL.md +99 -429
  103. package/config/skills/devlyn:ideate/references/elicitation.md +97 -0
  104. package/config/skills/devlyn:ideate/references/from-spec-mode.md +54 -0
  105. package/config/skills/devlyn:ideate/references/project-mode.md +76 -0
  106. package/config/skills/devlyn:ideate/references/spec-template.md +102 -0
  107. package/config/skills/devlyn:resolve/SKILL.md +172 -184
  108. package/config/skills/devlyn:resolve/references/free-form-mode.md +68 -0
  109. package/config/skills/devlyn:resolve/references/phases/build-gate.md +45 -0
  110. package/config/skills/devlyn:resolve/references/phases/cleanup.md +39 -0
  111. package/config/skills/devlyn:resolve/references/phases/implement.md +42 -0
  112. package/config/skills/devlyn:resolve/references/phases/plan.md +42 -0
  113. package/config/skills/devlyn:resolve/references/phases/verify.md +69 -0
  114. package/config/skills/devlyn:resolve/references/state-schema.md +106 -0
  115. package/{config/skills → optional-skills}/devlyn:design-system/SKILL.md +1 -0
  116. package/{config/skills → optional-skills}/devlyn:reap/SKILL.md +1 -0
  117. package/{config/skills → optional-skills}/devlyn:team-design-ui/SKILL.md +5 -0
  118. package/package.json +12 -2
  119. package/scripts/lint-skills.sh +431 -0
  120. package/config/skills/devlyn:auto-resolve/SKILL.md +0 -252
  121. package/config/skills/devlyn:auto-resolve/evals/evals.json +0 -21
  122. package/config/skills/devlyn:auto-resolve/evals/task-doctor-subcommand.md +0 -42
  123. package/config/skills/devlyn:auto-resolve/references/build-gate.md +0 -130
  124. package/config/skills/devlyn:auto-resolve/references/engine-routing.md +0 -82
  125. package/config/skills/devlyn:auto-resolve/references/findings-schema.md +0 -103
  126. package/config/skills/devlyn:auto-resolve/references/phases/phase-1-build.md +0 -54
  127. package/config/skills/devlyn:auto-resolve/references/phases/phase-2-evaluate.md +0 -45
  128. package/config/skills/devlyn:auto-resolve/references/phases/phase-3-critic.md +0 -84
  129. package/config/skills/devlyn:auto-resolve/references/pipeline-routing.md +0 -114
  130. package/config/skills/devlyn:auto-resolve/references/pipeline-state.md +0 -201
  131. package/config/skills/devlyn:auto-resolve/scripts/terminal_verdict.py +0 -96
  132. package/config/skills/devlyn:browser-validate/SKILL.md +0 -164
  133. package/config/skills/devlyn:browser-validate/references/flow-testing.md +0 -118
  134. package/config/skills/devlyn:browser-validate/references/tier1-chrome.md +0 -137
  135. package/config/skills/devlyn:browser-validate/references/tier2-playwright.md +0 -195
  136. package/config/skills/devlyn:browser-validate/references/tier3-curl.md +0 -57
  137. package/config/skills/devlyn:clean/SKILL.md +0 -285
  138. package/config/skills/devlyn:design-ui/SKILL.md +0 -351
  139. package/config/skills/devlyn:discover-product/SKILL.md +0 -124
  140. package/config/skills/devlyn:evaluate/SKILL.md +0 -564
  141. package/config/skills/devlyn:feature-spec/SKILL.md +0 -630
  142. package/config/skills/devlyn:ideate/references/challenge-rubric.md +0 -122
  143. package/config/skills/devlyn:ideate/references/codex-critic-template.md +0 -42
  144. package/config/skills/devlyn:ideate/references/templates/item-spec.md +0 -90
  145. package/config/skills/devlyn:implement-ui/SKILL.md +0 -466
  146. package/config/skills/devlyn:preflight/SKILL.md +0 -355
  147. package/config/skills/devlyn:preflight/references/auditors/browser-auditor.md +0 -32
  148. package/config/skills/devlyn:preflight/references/auditors/code-auditor.md +0 -86
  149. package/config/skills/devlyn:preflight/references/auditors/docs-auditor.md +0 -38
  150. package/config/skills/devlyn:product-spec/SKILL.md +0 -603
  151. package/config/skills/devlyn:recommend-features/SKILL.md +0 -286
  152. package/config/skills/devlyn:review/SKILL.md +0 -161
  153. package/config/skills/devlyn:team-resolve/SKILL.md +0 -631
  154. package/config/skills/devlyn:team-review/SKILL.md +0 -493
  155. package/config/skills/devlyn:update-docs/SKILL.md +0 -463
  156. package/config/skills/workflow-routing/SKILL.md +0 -73
  157. /package/{config/skills → optional-skills}/devlyn:reap/scripts/reap.sh +0 -0
  158. /package/{config/skills → optional-skills}/devlyn:reap/scripts/scan.sh +0 -0
@@ -0,0 +1,468 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ pair-plan-lint.py — validate a `pair-plan.json` against its registry and
4
+ the `pair-plan-schema.md` hard rules.
5
+
6
+ Inputs:
7
+ --plan <path> path to pair-plan.json (required)
8
+ --registry <path> override registry path (default: resolved from
9
+ plan.source.canonical_id_registry_path)
10
+ --quiet suppress stderr human summary
11
+
12
+ Output:
13
+ stdout — `{"ok": bool, "errors": [{code, message, ...}, ...]}` (machine-readable)
14
+ stderr — short human summary unless --quiet (skipped on --quiet)
15
+
16
+ Exit code:
17
+ 0 on pass, 1 on fail.
18
+
19
+ Schema source: config/skills/_shared/pair-plan-schema.md (iter-0022 ship).
20
+ """
21
+ import argparse
22
+ import copy
23
+ import hashlib
24
+ import json
25
+ import pathlib
26
+ import sys
27
+
28
+ SCHEMA_VERSION = "1"
29
+ AUTHORITY_ORDER_CANONICAL = [
30
+ "spec.md",
31
+ "expected.json/rubric",
32
+ "phase prompt",
33
+ "model preference",
34
+ ]
35
+ REQUIRED_TOP_LEVEL = [
36
+ "schema_version",
37
+ "plan_status",
38
+ "planning_mode",
39
+ "source",
40
+ "authority_order",
41
+ "rounds",
42
+ "accepted_invariants",
43
+ "rejected_alternatives",
44
+ "unresolved",
45
+ "escalated_to_user",
46
+ "model_stamps",
47
+ ]
48
+ REQUIRED_SOURCE_FIELDS = [
49
+ "spec_path",
50
+ "spec_sha256",
51
+ "rubric_path",
52
+ "rubric_sha256",
53
+ "canonical_id_registry_path",
54
+ "canonical_id_registry_sha256",
55
+ ]
56
+
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # JSON loading with strict-keys.
60
+ # ---------------------------------------------------------------------------
61
+ def _strict_pairs(pairs):
62
+ keys = [k for k, _ in pairs]
63
+ if len(keys) != len(set(keys)):
64
+ raise ValueError("duplicate key in pair-plan.json")
65
+ return dict(pairs)
66
+
67
+
68
+ def load_strict_json(path):
69
+ with open(path, "r", encoding="utf-8") as f:
70
+ return json.load(f, object_pairs_hook=_strict_pairs)
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Hash helpers.
75
+ # ---------------------------------------------------------------------------
76
+ def file_sha256(path):
77
+ with open(path, "rb") as f:
78
+ return hashlib.sha256(f.read()).hexdigest()
79
+
80
+
81
+ def canonical_pre_stamp_sha256(plan):
82
+ pre = copy.deepcopy(plan)
83
+ pre["model_stamps"] = {}
84
+ s = json.dumps(
85
+ pre,
86
+ sort_keys=True,
87
+ separators=(",", ":"),
88
+ ensure_ascii=False,
89
+ allow_nan=False,
90
+ )
91
+ return hashlib.sha256(s.encode("utf-8")).hexdigest()
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # Individual checks (each returns a list of error dicts).
96
+ # ---------------------------------------------------------------------------
97
+ PLAN_STATUS_VALID = {"final", "blocked", "draft"}
98
+ PLANNING_MODE_VALID = {"solo", "pair"}
99
+ ACCEPTED_INVARIANT_REQUIRED_FIELDS = ("id", "source_refs", "operational_check", "authority")
100
+
101
+
102
+ def check_top_level_shape(plan):
103
+ errs = []
104
+ if not isinstance(plan, dict):
105
+ return [{"code": "plan_not_object", "message": "pair-plan.json root must be a JSON object"}]
106
+ for f in REQUIRED_TOP_LEVEL:
107
+ if f not in plan:
108
+ errs.append({"code": "missing_top_level_field", "field": f,
109
+ "message": f"required top-level field missing: {f}"})
110
+ if plan.get("schema_version") != SCHEMA_VERSION:
111
+ errs.append({"code": "schema_version_mismatch",
112
+ "expected": SCHEMA_VERSION,
113
+ "got": plan.get("schema_version"),
114
+ "message": f"schema_version must be {SCHEMA_VERSION!r}"})
115
+ if plan.get("plan_status") not in PLAN_STATUS_VALID:
116
+ errs.append({"code": "plan_status_invalid",
117
+ "got": plan.get("plan_status"),
118
+ "message": f"plan_status must be one of {sorted(PLAN_STATUS_VALID)}"})
119
+ if plan.get("planning_mode") not in PLANNING_MODE_VALID:
120
+ errs.append({"code": "planning_mode_invalid",
121
+ "got": plan.get("planning_mode"),
122
+ "message": f"planning_mode must be one of {sorted(PLANNING_MODE_VALID)}"})
123
+ return errs
124
+
125
+
126
+ def check_accepted_invariants_shape(plan):
127
+ """Each accepted_invariants[] item MUST carry id + source_refs + operational_check + authority.
128
+ A plan with empty/missing operational_check has nothing for BUILD/EVAL/CRITIC to enforce."""
129
+ errs = []
130
+ for i, item in enumerate(plan.get("accepted_invariants") or []):
131
+ if not isinstance(item, dict):
132
+ errs.append({"code": "accepted_invariant_not_object",
133
+ "index": i,
134
+ "message": f"accepted_invariants[{i}] is not a JSON object"})
135
+ continue
136
+ for f in ACCEPTED_INVARIANT_REQUIRED_FIELDS:
137
+ if f not in item or item[f] in (None, "", []):
138
+ errs.append({"code": "accepted_invariant_missing_field",
139
+ "index": i,
140
+ "id": item.get("id"),
141
+ "field": f,
142
+ "message": f"accepted_invariants[{i}].{f} missing or empty"})
143
+ return errs
144
+
145
+
146
+ def check_registry_shape(registry):
147
+ """Lint MUST reject unsorted required_invariants and unknown top-level fields per schema doc."""
148
+ errs = []
149
+ allowed = {"schema_version", "fixture_id", "generated_at", "generated_from", "required_invariants"}
150
+ extra = sorted(set(registry.keys()) - allowed)
151
+ if extra:
152
+ errs.append({"code": "registry_unknown_field",
153
+ "fields": extra,
154
+ "message": f"canonical_id_registry.json has unknown top-level fields: {extra}"})
155
+ inv = registry.get("required_invariants") or []
156
+ ids = [e.get("id") for e in inv]
157
+ if ids != sorted(ids):
158
+ errs.append({"code": "registry_unsorted",
159
+ "message": "required_invariants[] must be sorted lexicographically by id"})
160
+ return errs
161
+
162
+
163
+ def check_authority_order(plan):
164
+ if plan.get("authority_order") != AUTHORITY_ORDER_CANONICAL:
165
+ return [{"code": "authority_order_drift",
166
+ "expected": AUTHORITY_ORDER_CANONICAL,
167
+ "got": plan.get("authority_order"),
168
+ "message": "authority_order must be the canonical 4-string snapshot"}]
169
+ return []
170
+
171
+
172
+ def check_unresolved_status(plan):
173
+ unresolved = plan.get("unresolved") or []
174
+ status = plan.get("plan_status")
175
+ if len(unresolved) > 0 and status not in ("blocked", "draft"):
176
+ return [{"code": "unresolved_with_final_status",
177
+ "unresolved_count": len(unresolved),
178
+ "plan_status": status,
179
+ "message": "unresolved is non-empty but plan_status is not 'blocked' or 'draft'"}]
180
+ return []
181
+
182
+
183
+ def check_escalated_status(plan):
184
+ escalated = plan.get("escalated_to_user") or []
185
+ status = plan.get("plan_status")
186
+ errs = []
187
+ if escalated:
188
+ if status in ("blocked", "draft"):
189
+ return errs
190
+ for i, item in enumerate(escalated):
191
+ if "user_resolution" not in item:
192
+ errs.append({"code": "escalated_without_resolution",
193
+ "index": i,
194
+ "id": item.get("id"),
195
+ "message": "escalated_to_user[] item missing user_resolution while plan_status is final"})
196
+ return errs
197
+
198
+
199
+ def check_planning_mode_rounds(plan):
200
+ mode = plan.get("planning_mode")
201
+ rounds = plan.get("rounds") or []
202
+ if mode == "pair" and len(rounds) < 1:
203
+ return [{"code": "pair_mode_no_rounds",
204
+ "message": "planning_mode=pair requires rounds.length >= 1"}]
205
+ if mode == "solo" and len(rounds) != 0:
206
+ return [{"code": "solo_mode_with_rounds",
207
+ "message": "planning_mode=solo requires rounds.length == 0"}]
208
+ return []
209
+
210
+
211
+ def check_accepted_ids_in_registry(plan, registry):
212
+ registry_ids = {e["id"] for e in registry.get("required_invariants", [])}
213
+ errs = []
214
+ for i, item in enumerate(plan.get("accepted_invariants") or []):
215
+ rid = item.get("id")
216
+ if rid not in registry_ids:
217
+ errs.append({"code": "accepted_id_not_in_registry",
218
+ "index": i,
219
+ "id": rid,
220
+ "message": f"accepted_invariants[{i}].id={rid!r} not present in registry"})
221
+ return errs
222
+
223
+
224
+ def check_registry_coverage(plan, registry):
225
+ """Every registry required_invariants[].id must be accounted for in a final plan.
226
+
227
+ Coverage is enforced only when plan_status == "final" — draft / blocked
228
+ plans are allowed to have un-decided ids in `unresolved[]` (Hard Rule #1
229
+ in pair-plan-schema.md) without tripping coverage. `unresolved[]` items
230
+ of shape `{id, note}` count as accounted-for at the structural level.
231
+ """
232
+ if plan.get("plan_status") != "final":
233
+ return []
234
+ registry_ids = [e["id"] for e in registry.get("required_invariants", [])]
235
+ accepted = {item.get("id") for item in (plan.get("accepted_invariants") or [])}
236
+ rejected_conflicts = set()
237
+ for item in plan.get("rejected_alternatives") or []:
238
+ for cid in item.get("conflicts_with_ids", []) or []:
239
+ rejected_conflicts.add(cid)
240
+ escalated = {item.get("id") for item in (plan.get("escalated_to_user") or [])}
241
+ unresolved = {item.get("id") for item in (plan.get("unresolved") or []) if isinstance(item, dict)}
242
+ accounted = accepted | rejected_conflicts | escalated | unresolved
243
+ errs = []
244
+ for rid in registry_ids:
245
+ if rid not in accounted:
246
+ errs.append({"code": "missing_required_id",
247
+ "id": rid,
248
+ "message": f"registry id {rid!r} is not in accepted_invariants, rejected_alternatives.conflicts_with_ids, escalated_to_user, or unresolved"})
249
+ return errs
250
+
251
+
252
+ def check_model_stamps(plan):
253
+ stamps = plan.get("model_stamps") or {}
254
+ errs = []
255
+ for who in ("claude", "codex"):
256
+ s = stamps.get(who)
257
+ if not isinstance(s, dict):
258
+ errs.append({"code": "stamp_missing", "who": who,
259
+ "message": f"model_stamps.{who} missing or not an object"})
260
+ continue
261
+ if s.get("status") not in ("sign", "block"):
262
+ errs.append({"code": "stamp_status_invalid", "who": who,
263
+ "got": s.get("status"),
264
+ "message": f"model_stamps.{who}.status must be 'sign' or 'block'"})
265
+ if "signed_plan_sha256" not in s:
266
+ errs.append({"code": "stamp_sha_missing", "who": who,
267
+ "message": f"model_stamps.{who}.signed_plan_sha256 missing"})
268
+ if plan.get("plan_status") == "final":
269
+ for who in ("claude", "codex"):
270
+ s = stamps.get(who) or {}
271
+ if s.get("status") != "sign":
272
+ errs.append({"code": "final_with_non_sign_stamp",
273
+ "who": who,
274
+ "status": s.get("status"),
275
+ "message": f"plan_status=final requires model_stamps.{who}.status=sign"})
276
+ return errs
277
+
278
+
279
+ def check_signed_pre_stamp_sha(plan):
280
+ expected = canonical_pre_stamp_sha256(plan)
281
+ stamps = plan.get("model_stamps") or {}
282
+ claude_sha = (stamps.get("claude") or {}).get("signed_plan_sha256")
283
+ codex_sha = (stamps.get("codex") or {}).get("signed_plan_sha256")
284
+ errs = []
285
+ if claude_sha != codex_sha:
286
+ errs.append({"code": "stamp_sha_disagree",
287
+ "claude_sha": claude_sha,
288
+ "codex_sha": codex_sha,
289
+ "message": "model_stamps.{claude,codex}.signed_plan_sha256 must be byte-identical"})
290
+ for who, sha in [("claude", claude_sha), ("codex", codex_sha)]:
291
+ if sha is not None and sha != expected:
292
+ errs.append({"code": "stamp_sha_mismatch_canonical",
293
+ "who": who,
294
+ "got": sha,
295
+ "expected": expected,
296
+ "message": f"model_stamps.{who}.signed_plan_sha256 does not equal canonical pre-stamp sha"})
297
+ return errs
298
+
299
+
300
+ def check_source_file_shas(plan, plan_path):
301
+ """Each source.<x>_sha256 must equal the raw-bytes sha256 of the referenced file."""
302
+ src = plan.get("source") or {}
303
+ plan_dir = pathlib.Path(plan_path).resolve().parent
304
+ errs = []
305
+ pairs = [
306
+ ("spec_path", "spec_sha256"),
307
+ ("expected_path", "expected_sha256"),
308
+ ("rubric_path", "rubric_sha256"),
309
+ ("canonical_id_registry_path", "canonical_id_registry_sha256"),
310
+ ]
311
+ for path_field, sha_field in pairs:
312
+ # expected_path is optional only when expected.json is genuinely absent;
313
+ # rubric/registry/spec are always required.
314
+ if path_field == "expected_path" and path_field not in src:
315
+ continue
316
+ if path_field not in src:
317
+ errs.append({"code": "source_path_missing",
318
+ "field": path_field,
319
+ "message": f"source.{path_field} missing"})
320
+ continue
321
+ if sha_field not in src:
322
+ errs.append({"code": "source_sha_missing",
323
+ "field": sha_field,
324
+ "message": f"source.{sha_field} missing"})
325
+ continue
326
+ path_str = src[path_field]
327
+ path_abs = _resolve_repo_path(path_str, plan_dir)
328
+ if path_abs is None:
329
+ errs.append({"code": "source_path_unreadable",
330
+ "field": path_field,
331
+ "path": str(path_abs),
332
+ "message": f"source.{path_field} resolves to a path that does not exist"})
333
+ continue
334
+ actual = file_sha256(path_abs)
335
+ if actual != src[sha_field]:
336
+ errs.append({"code": "source_sha_drift",
337
+ "field": sha_field,
338
+ "expected": src[sha_field],
339
+ "actual": actual,
340
+ "path": str(path_abs),
341
+ "message": f"source.{sha_field} does not match raw-bytes sha of {path_str}"})
342
+ return errs
343
+
344
+
345
+ # ---------------------------------------------------------------------------
346
+ # Path resolution — repo-relative paths in plan/source live in the repo,
347
+ # but the plan itself can be anywhere (e.g. /tmp during tests). Try three
348
+ # anchors in order: absolute path → git-root ancestor of plan_dir → cwd.
349
+ # Returns the first existing absolute path, or None if no anchor yields one.
350
+ # ---------------------------------------------------------------------------
351
+ def _git_root_ancestor(start):
352
+ s = pathlib.Path(start).resolve()
353
+ while s != s.parent:
354
+ if (s / ".git").exists():
355
+ return s
356
+ s = s.parent
357
+ return None
358
+
359
+
360
+ def _resolve_repo_path(path_str, plan_dir):
361
+ p = pathlib.Path(path_str)
362
+ if p.is_absolute():
363
+ return p if p.exists() else None
364
+ candidates = []
365
+ repo_root = _git_root_ancestor(plan_dir)
366
+ if repo_root is not None:
367
+ candidates.append((repo_root / p).resolve())
368
+ candidates.append((pathlib.Path.cwd() / p).resolve())
369
+ candidates.append((plan_dir / p).resolve())
370
+ seen = set()
371
+ for c in candidates:
372
+ if c in seen:
373
+ continue
374
+ seen.add(c)
375
+ if c.exists():
376
+ return c
377
+ return None
378
+
379
+
380
+ def resolve_registry_from_plan(plan, plan_path):
381
+ src = plan.get("source") or {}
382
+ p = src.get("canonical_id_registry_path")
383
+ if p is None:
384
+ return None
385
+ plan_dir = pathlib.Path(plan_path).resolve().parent
386
+ return _resolve_repo_path(p, plan_dir)
387
+
388
+
389
+ # ---------------------------------------------------------------------------
390
+ # Top-level lint.
391
+ # ---------------------------------------------------------------------------
392
+ def lint(plan_path, registry_override=None):
393
+ try:
394
+ plan = load_strict_json(plan_path)
395
+ except json.JSONDecodeError as e:
396
+ return {"ok": False, "errors": [{"code": "plan_invalid_json",
397
+ "message": f"plan parse error: {e}"}]}
398
+ except ValueError as e:
399
+ return {"ok": False, "errors": [{"code": "plan_duplicate_keys",
400
+ "message": str(e)}]}
401
+ except FileNotFoundError:
402
+ return {"ok": False, "errors": [{"code": "plan_not_found",
403
+ "message": f"plan file not found: {plan_path}"}]}
404
+
405
+ # top-level shape first; skip downstream checks if shape is broken
406
+ shape_errs = check_top_level_shape(plan)
407
+ if shape_errs:
408
+ return {"ok": False, "errors": shape_errs}
409
+
410
+ # resolve registry
411
+ if registry_override:
412
+ registry_path = pathlib.Path(registry_override).resolve()
413
+ else:
414
+ registry_path = resolve_registry_from_plan(plan, plan_path)
415
+ if registry_path is None or not registry_path.exists():
416
+ return {"ok": False, "errors": [{
417
+ "code": "registry_unreachable",
418
+ "registry_path": str(registry_path) if registry_path else None,
419
+ "message": "could not resolve canonical_id_registry; provide --registry or fix source.canonical_id_registry_path",
420
+ }]}
421
+ try:
422
+ registry = load_strict_json(registry_path)
423
+ except (json.JSONDecodeError, ValueError) as e:
424
+ return {"ok": False, "errors": [{"code": "registry_invalid",
425
+ "registry_path": str(registry_path),
426
+ "message": str(e)}]}
427
+
428
+ errors = []
429
+ errors += check_registry_shape(registry)
430
+ errors += check_authority_order(plan)
431
+ errors += check_accepted_invariants_shape(plan)
432
+ errors += check_unresolved_status(plan)
433
+ errors += check_escalated_status(plan)
434
+ errors += check_planning_mode_rounds(plan)
435
+ errors += check_accepted_ids_in_registry(plan, registry)
436
+ errors += check_registry_coverage(plan, registry)
437
+ errors += check_model_stamps(plan)
438
+ errors += check_signed_pre_stamp_sha(plan)
439
+ errors += check_source_file_shas(plan, plan_path)
440
+
441
+ return {"ok": not errors, "errors": errors,
442
+ "registry_path": str(registry_path)}
443
+
444
+
445
+ def main():
446
+ ap = argparse.ArgumentParser()
447
+ ap.add_argument("--plan", required=True)
448
+ ap.add_argument("--registry", default=None,
449
+ help="Override registry path (default: source.canonical_id_registry_path)")
450
+ ap.add_argument("--quiet", action="store_true")
451
+ args = ap.parse_args()
452
+
453
+ verdict = lint(args.plan, registry_override=args.registry)
454
+
455
+ print(json.dumps(verdict, indent=2, sort_keys=True))
456
+ if not args.quiet:
457
+ if verdict["ok"]:
458
+ print(f"pair-plan-lint: PASS — {args.plan} (registry: {verdict.get('registry_path')})", file=sys.stderr)
459
+ else:
460
+ print(f"pair-plan-lint: FAIL — {args.plan} ({len(verdict['errors'])} error(s))", file=sys.stderr)
461
+ for e in verdict["errors"]:
462
+ print(f" [{e.get('code')}] {e.get('message')}", file=sys.stderr)
463
+
464
+ sys.exit(0 if verdict["ok"] else 1)
465
+
466
+
467
+ if __name__ == "__main__":
468
+ main()