lgtm-specs 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/.claude/settings.local.json +14 -0
  2. package/.gemini/README.md +8 -0
  3. package/.gemini/config.yaml +20 -0
  4. package/.gemini/styleguide.md +35 -0
  5. package/.github/workflows/README.md +5 -0
  6. package/.github/workflows/release.yml +52 -0
  7. package/.github/workflows/validate.yml +27 -0
  8. package/.prettierignore +4 -0
  9. package/.prettierrc +1 -0
  10. package/AGENTS.md +151 -0
  11. package/README.md +98 -0
  12. package/VERSION +1 -0
  13. package/agents/README.md +73 -0
  14. package/agents/modes/README.md +9 -0
  15. package/agents/modes/build.md +88 -0
  16. package/agents/modes/hack.md +76 -0
  17. package/agents/modes/review.md +79 -0
  18. package/agents/roles/builder.md +75 -0
  19. package/agents/roles/counsel.md +96 -0
  20. package/agents/roles/explorer.md +77 -0
  21. package/agents/roles/lead.md +76 -0
  22. package/agents/roles/librarian.md +63 -0
  23. package/agents/roles/planner.md +75 -0
  24. package/agents/roles/reviewer/BASE.md +9 -0
  25. package/agents/roles/reviewer/OUTPUT_FORMAT.md +4 -0
  26. package/agents/roles/reviewer/README.md +48 -0
  27. package/agents/roles/reviewer/lite.md +51 -0
  28. package/agents/roles/reviewer/logic.md +48 -0
  29. package/agents/roles/reviewer/performance.md +45 -0
  30. package/agents/roles/reviewer/plan.md +52 -0
  31. package/agents/roles/reviewer/quality.md +49 -0
  32. package/agents/roles/reviewer/security.md +47 -0
  33. package/agents/roles/reviewer/test.md +48 -0
  34. package/agents/templates/README.md +6 -0
  35. package/agents/templates/mode.md +33 -0
  36. package/agents/templates/role.md +73 -0
  37. package/contribute/README.md +24 -0
  38. package/contribute/add-agent.md +29 -0
  39. package/contribute/add-ci.md +31 -0
  40. package/contribute/add-constitution.md +17 -0
  41. package/contribute/add-law.md +20 -0
  42. package/contribute/add-policy.md +27 -0
  43. package/contribute/checklist.md +42 -0
  44. package/contribute/maintenance.md +19 -0
  45. package/contribute/update-models.md +47 -0
  46. package/docs/README.md +13 -0
  47. package/docs/adr/0001-knowledge-engineering-workflow.md +22 -0
  48. package/docs/adr/0002-rule-hierarchy.md +25 -0
  49. package/docs/adr/0003-atomic-knowledge-graph.md +21 -0
  50. package/docs/adr/0004-identification-schema.md +22 -0
  51. package/docs/adr/0005-agent-specialization.md +39 -0
  52. package/docs/adr/0006-git-workflow-integrity.md +34 -0
  53. package/docs/adr/0007-operating-modes-and-gates.md +54 -0
  54. package/docs/adr/0008-rules-vs-workflows-boundary.md +64 -0
  55. package/docs/adr/README.md +14 -0
  56. package/docs/agent_architecture.md +164 -0
  57. package/docs/context_lifecycle.md +228 -0
  58. package/docs/engineering_principles.md +128 -0
  59. package/docs/local_policies.md +59 -0
  60. package/docs/meta/collaborative_dynamics.md +142 -0
  61. package/docs/meta/domains/README.md +8 -0
  62. package/docs/meta/domains/bitcoin/01-units.md +21 -0
  63. package/docs/meta/domains/bitcoin/02-broadcast-cancellation.md +20 -0
  64. package/docs/meta/domains/bitcoin/03-fee-rates-rounding.md +21 -0
  65. package/docs/meta/domains/bitcoin/04-confirmations-reorgs.md +20 -0
  66. package/docs/meta/domains/bitcoin/05-address-gap-limit.md +16 -0
  67. package/docs/meta/domains/bitcoin/06-relay-policy.md +27 -0
  68. package/docs/meta/domains/bitcoin/README.md +12 -0
  69. package/docs/meta/domains/git/01-workflow.md +89 -0
  70. package/docs/meta/domains/git/02-commits.md +57 -0
  71. package/docs/meta/domains/git/03-collaboration.md +40 -0
  72. package/docs/meta/domains/git/04-integrity.md +26 -0
  73. package/docs/meta/domains/git/05-configuration.md +209 -0
  74. package/docs/meta/domains/git/06-advanced.md +130 -0
  75. package/docs/meta/domains/git/README.md +29 -0
  76. package/docs/meta/industry_best_practices.md +555 -0
  77. package/docs/meta/languages/README.md +8 -0
  78. package/docs/meta/languages/go/01-concurrency.md +37 -0
  79. package/docs/meta/languages/go/02-api-design.md +30 -0
  80. package/docs/meta/languages/go/03-resilience.md +27 -0
  81. package/docs/meta/languages/go/04-errors.md +27 -0
  82. package/docs/meta/languages/go/05-performance.md +18 -0
  83. package/docs/meta/languages/go/06-safety.md +18 -0
  84. package/docs/meta/languages/go/07-testing.md +44 -0
  85. package/docs/meta/languages/go/08-config-layout.md +23 -0
  86. package/docs/meta/languages/go/README.md +14 -0
  87. package/docs/meta/languages/typescript/01-strictness.md +19 -0
  88. package/docs/meta/languages/typescript/02-immutability.md +15 -0
  89. package/docs/meta/languages/typescript/03-async.md +18 -0
  90. package/docs/meta/languages/typescript/04-design.md +19 -0
  91. package/docs/meta/languages/typescript/05-control-flow.md +11 -0
  92. package/docs/meta/languages/typescript/README.md +11 -0
  93. package/docs/meta/workflow.md +68 -0
  94. package/docs/philosophy.md +36 -0
  95. package/integrate/README.md +459 -0
  96. package/integrate/versioning.md +41 -0
  97. package/models/README.md +68 -0
  98. package/models/registry.yaml +55 -0
  99. package/package.json +11 -0
  100. package/rules/README.md +57 -0
  101. package/rules/RULE-00000-EXAMPLE.md +29 -0
  102. package/rules/constitution/CONS-00001-srp.md +40 -0
  103. package/rules/constitution/CONS-00002-ocp.md +43 -0
  104. package/rules/constitution/CONS-00003-lsp.md +44 -0
  105. package/rules/constitution/CONS-00004-isp.md +46 -0
  106. package/rules/constitution/CONS-00005-dip.md +37 -0
  107. package/rules/constitution/CONS-00006-dry.md +45 -0
  108. package/rules/constitution/CONS-00007-demeter.md +35 -0
  109. package/rules/constitution/CONS-00008-composition.md +44 -0
  110. package/rules/constitution/CONS-00009-deep-modules.md +39 -0
  111. package/rules/constitution/CONS-00010-kiss.md +47 -0
  112. package/rules/constitution/CONS-00011-yagni.md +49 -0
  113. package/rules/constitution/CONS-00012-cognitive-limits.md +28 -0
  114. package/rules/constitution/CONS-00013-boy-scout.md +27 -0
  115. package/rules/constitution/CONS-00014-broken-windows.md +35 -0
  116. package/rules/constitution/CONS-00015-safety.md +46 -0
  117. package/rules/constitution/CONS-00016-cqs.md +39 -0
  118. package/rules/constitution/CONS-00017-postel.md +35 -0
  119. package/rules/constitution/CONS-00018-cap.md +35 -0
  120. package/rules/constitution/CONS-00019-fallacies.md +37 -0
  121. package/rules/constitution/CONS-00020-shift-left.md +28 -0
  122. package/rules/constitution/CONS-00021-congruence.md +28 -0
  123. package/rules/constitution/CONS-00022-orthogonality.md +40 -0
  124. package/rules/constitution/CONS-00023-determinism.md +38 -0
  125. package/rules/constitution/CONS-00024-security.md +42 -0
  126. package/rules/constitution/CONS-00025-efficiency.md +38 -0
  127. package/rules/constitution/CONS-00026-resilience.md +41 -0
  128. package/rules/constitution/CONS-00027-transparency.md +40 -0
  129. package/rules/constitution/CONS-00028-evolvability.md +36 -0
  130. package/rules/constitution/CONS-00029-operability.md +36 -0
  131. package/rules/constitution/CONS-00030-rework-cycle.md +27 -0
  132. package/rules/constitution/CONS-00031-checklist.md +28 -0
  133. package/rules/constitution/CONS-00032-documentation.md +39 -0
  134. package/rules/constitution/README.md +52 -0
  135. package/rules/laws/README.md +15 -0
  136. package/rules/laws/bitcoin/BTC-00001-amounts-as-satoshis.md +39 -0
  137. package/rules/laws/bitcoin/BTC-00002-broadcast-not-cancelable.md +36 -0
  138. package/rules/laws/bitcoin/BTC-00003-fee-rate-math-rounding.md +37 -0
  139. package/rules/laws/bitcoin/BTC-00004-confirmations-and-reorgs.md +40 -0
  140. package/rules/laws/bitcoin/BTC-00005-address-gap-limit.md +37 -0
  141. package/rules/laws/bitcoin/BTC-00006-relay-is-policy-dependent.md +36 -0
  142. package/rules/laws/bitcoin/BTC-00007-dust-policy.md +36 -0
  143. package/rules/laws/bitcoin/BTC-00008-min-relay-fee.md +36 -0
  144. package/rules/laws/bitcoin/BTC-00009-feefilter.md +36 -0
  145. package/rules/laws/bitcoin/README.md +29 -0
  146. package/rules/laws/default.md +30 -0
  147. package/rules/laws/git/GIT-00001-atomic-commit.md +29 -0
  148. package/rules/laws/git/GIT-00002-imperative-subject.md +27 -0
  149. package/rules/laws/git/GIT-00003-formatting-50-72.md +28 -0
  150. package/rules/laws/git/GIT-00004-trunk-based.md +28 -0
  151. package/rules/laws/git/GIT-00005-public-immutability.md +26 -0
  152. package/rules/laws/git/GIT-00006-signing.md +27 -0
  153. package/rules/laws/git/GIT-00007-reviewer-capital.md +26 -0
  154. package/rules/laws/git/GIT-00008-patch-series.md +28 -0
  155. package/rules/laws/git/GIT-00009-branch-naming.md +28 -0
  156. package/rules/laws/git/GIT-00010-pr-hygiene.md +51 -0
  157. package/rules/laws/git/GIT-00011-merge-method.md +35 -0
  158. package/rules/laws/git/GIT-00012-conflict-resolution.md +35 -0
  159. package/rules/laws/git/GIT-00013-ignore-standards.md +38 -0
  160. package/rules/laws/git/GIT-00014-lfs-large-binaries.md +37 -0
  161. package/rules/laws/git/GIT-00015-git-hooks.md +35 -0
  162. package/rules/laws/git/GIT-00016-branch-protection.md +34 -0
  163. package/rules/laws/git/GIT-00017-secrets-management.md +34 -0
  164. package/rules/laws/git/GIT-00018-ci-enforcement.md +33 -0
  165. package/rules/laws/git/GIT-00019-review-checklist.md +39 -0
  166. package/rules/laws/git/GIT-00020-issue-references.md +34 -0
  167. package/rules/laws/git/GIT-00021-partial-staging.md +38 -0
  168. package/rules/laws/git/GIT-00022-feature-flags.md +33 -0
  169. package/rules/laws/git/GIT-00023-breaking-changes.md +41 -0
  170. package/rules/laws/git/GIT-00024-dependency-management.md +44 -0
  171. package/rules/laws/git/GIT-00025-large-repository-optimization.md +54 -0
  172. package/rules/laws/git/README.md +31 -0
  173. package/rules/laws/go/GO-00001-actor-model.md +51 -0
  174. package/rules/laws/go/GO-00002-api-design.md +37 -0
  175. package/rules/laws/go/GO-00003-error-handling.md +43 -0
  176. package/rules/laws/go/GO-00004-context.md +45 -0
  177. package/rules/laws/go/GO-00005-performance.md +40 -0
  178. package/rules/laws/go/GO-00006-packages.md +29 -0
  179. package/rules/laws/go/GO-00007-circuit-breakers.md +43 -0
  180. package/rules/laws/go/GO-00008-safety.md +39 -0
  181. package/rules/laws/go/GO-00009-table-driven-test.md +48 -0
  182. package/rules/laws/go/GO-00010-escape-analysis.md +37 -0
  183. package/rules/laws/go/GO-00011-retry.md +45 -0
  184. package/rules/laws/go/GO-00012-rate-limiting.md +42 -0
  185. package/rules/laws/go/GO-00013-io-buffering.md +43 -0
  186. package/rules/laws/go/GO-00014-memory-layout.md +41 -0
  187. package/rules/laws/go/GO-00015-aaa-pattern.md +49 -0
  188. package/rules/laws/go/GO-00016-test-libraries.md +35 -0
  189. package/rules/laws/go/GO-00017-comments.md +37 -0
  190. package/rules/laws/go/GO-00018-test-isolation.md +38 -0
  191. package/rules/laws/go/GO-00019-test-comments.md +36 -0
  192. package/rules/laws/go/GO-00020-mocking.md +36 -0
  193. package/rules/laws/go/GO-00021-configuration.md +36 -0
  194. package/rules/laws/go/GO-00022-observability.md +34 -0
  195. package/rules/laws/go/GO-00023-dependency-management.md +28 -0
  196. package/rules/laws/go/GO-00024-project-layout.md +30 -0
  197. package/rules/laws/go/GO-00025-concurrency-patterns.md +39 -0
  198. package/rules/laws/go/README.md +45 -0
  199. package/rules/laws/typescript/README.md +14 -0
  200. package/rules/laws/typescript/TS-00001-no-any.md +39 -0
  201. package/rules/laws/typescript/TS-00002-immutability.md +36 -0
  202. package/rules/laws/typescript/TS-00003-async.md +35 -0
  203. package/rules/laws/typescript/TS-00004-strict-null.md +38 -0
  204. package/rules/laws/typescript/TS-00005-unions.md +35 -0
  205. package/rules/laws/typescript/TS-00006-interface.md +38 -0
  206. package/rules/laws/typescript/TS-00007-generics.md +38 -0
  207. package/rules/laws/typescript/TS-00008-modules.md +28 -0
  208. package/rules/policies/README.md +12 -0
  209. package/rules/policies/default.md +28 -0
  210. package/scripts/README.md +45 -0
  211. package/scripts/generate_release_notes.py +376 -0
  212. package/scripts/validate_specs.py +730 -0
@@ -0,0 +1,730 @@
1
+ """Repository integrity validation for lgtm-specs.
2
+
3
+ This script validates the Knowledge Graph structure at a coarse level.
4
+
5
+ Design goals:
6
+ - Zero third-party dependencies.
7
+ - Deterministic output suitable for CI.
8
+
9
+ Policy:
10
+ - Validation is pass/fail.
11
+ - WARN findings fail validation unless baselined.
12
+
13
+ Notes:
14
+ - Errors are reserved for broken invariants (e.g., spec version mismatch).
15
+ - Most content shape checks are emitted as warnings; they still fail validation
16
+ unless ignored via `--baseline`.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ import re
23
+ import sys
24
+ import argparse
25
+ import urllib.error
26
+ import urllib.parse
27
+ import urllib.request
28
+ import time
29
+ from dataclasses import dataclass
30
+ from pathlib import Path
31
+
32
+
33
+ REPO_ROOT = Path(__file__).resolve().parent.parent
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class Finding:
38
+ """A single validation finding.
39
+
40
+ Attributes:
41
+ level: One of "ERROR" or "WARN".
42
+ message: Human-readable message.
43
+ """
44
+
45
+ level: str
46
+ message: str
47
+
48
+
49
+ def repo_rel(p: Path) -> str:
50
+ """Return a repo-relative path for printing."""
51
+
52
+ # Normalize to forward slashes for stable output across platforms.
53
+ return str(p.relative_to(REPO_ROOT)).replace(os.sep, "/")
54
+
55
+
56
+ def walk_files(root: Path) -> list[Path]:
57
+ """Recursively enumerate all files under root.
58
+
59
+ We use os.walk to keep behavior consistent across platforms.
60
+ """
61
+
62
+ out: list[Path] = []
63
+ for dirpath, _dirnames, filenames in os.walk(root):
64
+ for fn in filenames:
65
+ out.append(Path(dirpath) / fn)
66
+ return out
67
+
68
+
69
+ def read_text(p: Path) -> str:
70
+ """Read UTF-8 text from a file."""
71
+
72
+ return p.read_text(encoding="utf-8")
73
+
74
+
75
+ def is_markdown(p: Path) -> bool:
76
+ """True if file is a Markdown file."""
77
+
78
+ return p.suffix == ".md"
79
+
80
+
81
+ def is_readme(p: Path) -> bool:
82
+ """True if file is a README.md."""
83
+
84
+ return p.name == "README.md"
85
+
86
+
87
+ def is_rule_file(p: Path) -> bool:
88
+ """True if this is an atomic rule file (constitution or law).
89
+
90
+ We intentionally exclude:
91
+ - Directory READMEs (indexes)
92
+ - `rules/laws/default.md` (fallback rules, not an atomic law)
93
+ """
94
+
95
+ if not is_markdown(p) or is_readme(p):
96
+ return False
97
+
98
+ rel = repo_rel(p).replace(os.sep, "/")
99
+
100
+ if rel.startswith("rules/constitution/"):
101
+ return True
102
+
103
+ if rel == "rules/laws/default.md":
104
+ return False
105
+
106
+ return rel.startswith("rules/laws/")
107
+
108
+
109
+ def is_policy_file(p: Path) -> bool:
110
+ """True if this is a policy spec file under rules/policies/."""
111
+
112
+ if not is_markdown(p) or is_readme(p):
113
+ return False
114
+ rel = repo_rel(p).replace(os.sep, "/")
115
+ return rel.startswith("rules/policies/")
116
+
117
+
118
+ def is_rules_template_file(p: Path) -> bool:
119
+ """True if this is the rule template file."""
120
+
121
+ if not is_markdown(p) or is_readme(p):
122
+ return False
123
+ return repo_rel(p).replace(os.sep, "/") == "rules/RULE-00000-EXAMPLE.md"
124
+
125
+
126
+ def is_default_law_file(p: Path) -> bool:
127
+ """True if this is the non-atomic fallback law file."""
128
+
129
+ if not is_markdown(p) or is_readme(p):
130
+ return False
131
+ return repo_rel(p).replace(os.sep, "/") == "rules/laws/default.md"
132
+
133
+
134
+ RULE_FILENAME_RE = re.compile(r"^[A-Z]+-\d{5}-[a-z0-9][a-z0-9-]*\.md$")
135
+
136
+ # Every atomic rule should include at least one high-level "dimension" tag.
137
+ DIMENSION_TAGS = {"#structural", "#behavioral", "#runtime", "#operational"}
138
+
139
+
140
+ def find_meta_line(text: str, key: str) -> str | None:
141
+ """Return the first metadata line matching **Key**: ... if present."""
142
+
143
+ for line in text.splitlines():
144
+ if line.startswith(f"**{key}**:"):
145
+ return line
146
+ if line.startswith(f"**{key}** :"):
147
+ return line
148
+ return None
149
+
150
+
151
+ # External URL checks are best-effort and may need temporary suppressions.
152
+ # Keep this empty by default (fork-friendly); use `--url-ignore` for project-specific ignores.
153
+ DEFAULT_IGNORED_URLS: set[str] = set()
154
+
155
+ # Common badge endpoints are often flaky/permission-dependent for HEAD checks.
156
+ IGNORED_URL_PATTERNS = [
157
+ re.compile(r"^https?://github\.com/.+/actions/workflows/.+/badge\.svg(\?.*)?$"),
158
+ ]
159
+
160
+
161
+ def extract_first_link(line: str) -> str | None:
162
+ """Extract the first Markdown link target from a line."""
163
+
164
+ links = extract_markdown_links(line)
165
+ return links[0] if links else None
166
+
167
+
168
+ def extract_markdown_links(text: str) -> list[str]:
169
+ """Extract all Markdown link targets from a text blob."""
170
+
171
+ # Minimal parser for `[label](target)` that tolerates nested `(...)` in the target.
172
+ out: list[str] = []
173
+ i = 0
174
+ while True:
175
+ lb = text.find("[", i)
176
+ if lb == -1:
177
+ break
178
+
179
+ # Ignore image syntax: ![alt](url)
180
+ if lb > 0 and text[lb - 1] == "!":
181
+ i = lb + 1
182
+ continue
183
+
184
+ rb = text.find("]", lb + 1)
185
+ if rb == -1:
186
+ break
187
+
188
+ if rb + 1 >= len(text) or text[rb + 1] != "(":
189
+ i = rb + 1
190
+ continue
191
+
192
+ j = rb + 2
193
+ depth = 1
194
+ start = j
195
+ while j < len(text) and depth > 0:
196
+ ch = text[j]
197
+ if ch == "(":
198
+ depth += 1
199
+ elif ch == ")":
200
+ depth -= 1
201
+ if depth == 0:
202
+ out.append(text[start:j])
203
+ j += 1
204
+ break
205
+ j += 1
206
+
207
+ i = j
208
+
209
+ return out
210
+
211
+
212
+ def normalize_link_target(target: str) -> str:
213
+ """Normalize Markdown link target.
214
+
215
+ Handles common forms such as:
216
+ - [x](https://example.com)
217
+ - [x](https://example.com "title")
218
+ - [x](<https://example.com>)
219
+ """
220
+
221
+ t = target.strip()
222
+ if t.startswith("<") and t.endswith(">"):
223
+ t = t[1:-1].strip()
224
+
225
+ # Strip optional title suffix from markdown link target.
226
+ # Example: https://example.com "title"
227
+ if t.startswith("http://") or t.startswith("https://"):
228
+ t = t.split()[0]
229
+
230
+ return t
231
+
232
+
233
+ def is_external_http_url(target: str) -> bool:
234
+ """True if target is an external http(s) URL."""
235
+
236
+ parsed = urllib.parse.urlparse(target)
237
+ return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
238
+
239
+
240
+ def probe_url_status(url: str, timeout: float) -> tuple[int | None, str | None]:
241
+ """Probe URL with HEAD first, then fallback GET for unsupported HEAD.
242
+
243
+ Returns:
244
+ (status_code, error_text)
245
+ """
246
+
247
+ headers = {"User-Agent": "lgtm-specs-validator/1.0"}
248
+ transient_statuses = {429, 500, 502, 503, 504}
249
+
250
+ def do_request(method: str) -> tuple[int | None, str | None]:
251
+ req_headers = dict(headers)
252
+ if method == "GET":
253
+ # Minimize payload when falling back from HEAD.
254
+ req_headers["Range"] = "bytes=0-0"
255
+
256
+ req = urllib.request.Request(url, method=method, headers=req_headers)
257
+ try:
258
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
259
+ return getattr(resp, "status", 200), None
260
+ except urllib.error.HTTPError as e:
261
+ return e.code, None
262
+ except urllib.error.URLError as e:
263
+ return None, str(e.reason)
264
+
265
+ status, err = do_request("HEAD")
266
+ for attempt in range(2):
267
+ if status is None or status in transient_statuses:
268
+ time.sleep(0.5 * (attempt + 1))
269
+ status, err = do_request("HEAD")
270
+
271
+ # Some endpoints don't support HEAD.
272
+ if status in {405, 501}:
273
+ status, err = do_request("GET")
274
+ for attempt in range(2):
275
+ if status is None or status in transient_statuses:
276
+ time.sleep(0.5 * (attempt + 1))
277
+ status, err = do_request("GET")
278
+
279
+ return status, err
280
+
281
+
282
+ def validate_external_urls(findings: list[Finding], timeout: float, ignored_urls: set[str]) -> None:
283
+ """Validate external URLs referenced in Markdown files.
284
+
285
+ Notes:
286
+ - Internal repo links are intentionally skipped.
287
+ - External URL checks are network-dependent, so this should be opt-in.
288
+ """
289
+
290
+ url_sources: dict[str, list[str]] = {}
291
+
292
+ for p in walk_files(REPO_ROOT):
293
+ if not is_markdown(p):
294
+ continue
295
+
296
+ rel = repo_rel(p)
297
+ if rel.startswith(".git/"):
298
+ continue
299
+ if rel.startswith("node_modules/"):
300
+ continue
301
+ if rel.startswith("__pycache__/"):
302
+ continue
303
+ for raw_target in extract_markdown_links(read_text(p)):
304
+ target = normalize_link_target(raw_target)
305
+ if not is_external_http_url(target):
306
+ continue
307
+
308
+ url_sources.setdefault(target, []).append(rel)
309
+
310
+ for url, sources in sorted(url_sources.items()):
311
+ if url in ignored_urls:
312
+ continue
313
+
314
+ if any(p.search(url) for p in IGNORED_URL_PATTERNS):
315
+ continue
316
+
317
+ status, err = probe_url_status(url, timeout)
318
+
319
+ # 2xx/3xx are good.
320
+ if status is not None and 200 <= status < 400:
321
+ continue
322
+
323
+ # Many doc sites return auth/forbidden for bots but are still valid links.
324
+ if status in {401, 403}:
325
+ continue
326
+
327
+ where = ", ".join(sorted(set(sources)))
328
+ if status in {404, 410}:
329
+ findings.append(
330
+ Finding(
331
+ "WARN",
332
+ f"Broken external URL ({status}): {url} (found in {where})",
333
+ )
334
+ )
335
+ elif status is not None:
336
+ findings.append(
337
+ Finding(
338
+ "WARN",
339
+ f"External URL returned status {status}: {url} (found in {where})",
340
+ )
341
+ )
342
+ else:
343
+ findings.append(
344
+ Finding(
345
+ "WARN",
346
+ f"External URL probe failed ({err}): {url} (found in {where})",
347
+ )
348
+ )
349
+
350
+
351
+ def validate_spec_version(findings: list[Finding]) -> None:
352
+ """Ensure VERSION matches integrate/README.md Spec Version.
353
+
354
+ This intentionally enforces strict equality.
355
+ Keep `VERSION` and `integrate/README.md` `**Spec Version**:` in sync.
356
+ """
357
+
358
+ version_file = REPO_ROOT / "VERSION"
359
+ integrate_readme = REPO_ROOT / "integrate" / "README.md"
360
+
361
+ if not version_file.exists():
362
+ findings.append(Finding("ERROR", "Missing VERSION file"))
363
+ return
364
+
365
+ version = read_text(version_file).strip()
366
+ integrate = read_text(integrate_readme)
367
+
368
+ m = re.search(r"\*\*Spec Version\*\*:\s*([^\r\n]+)", integrate)
369
+ if not m:
370
+ findings.append(Finding("ERROR", "Missing **Spec Version** in integrate/README.md"))
371
+ return
372
+
373
+ spec_version = m.group(1).strip()
374
+ if spec_version != version:
375
+ findings.append(
376
+ Finding(
377
+ "ERROR",
378
+ f"Spec version mismatch: VERSION={version} integrate/README.md={spec_version}",
379
+ )
380
+ )
381
+
382
+
383
+ def validate_rule_shape(p: Path, findings: list[Finding]) -> None:
384
+ """Validate rule-like file shape.
385
+
386
+ We emit WARN-level findings for legacy gaps.
387
+ """
388
+
389
+ validate_rule_shape_with_options(p, findings, enforce_filename=True)
390
+
391
+
392
+ def validate_rule_shape_with_options(
393
+ p: Path,
394
+ findings: list[Finding],
395
+ *,
396
+ enforce_filename: bool,
397
+ ) -> None:
398
+ """Validate rule-like file shape.
399
+
400
+ Args:
401
+ enforce_filename: If true, enforce atomic rule filename format.
402
+ """
403
+
404
+ base = p.name
405
+ if enforce_filename and not RULE_FILENAME_RE.match(base):
406
+ findings.append(
407
+ Finding("WARN", f"Bad filename (expected PREFIX-00000-slug.md): {repo_rel(p)}")
408
+ )
409
+
410
+ text = read_text(p)
411
+ if not text.startswith("# "):
412
+ findings.append(Finding("WARN", f"Missing H1 title: {repo_rel(p)}"))
413
+ return
414
+
415
+ for key in ("Source", "Tags", "Related"):
416
+ if find_meta_line(text, key) is None:
417
+ findings.append(Finding("WARN", f"Missing **{key}** metadata: {repo_rel(p)}"))
418
+
419
+ # Validate that metadata links resolve to real files (internal links only).
420
+ for key in ("Source", "Related"):
421
+ meta = find_meta_line(text, key)
422
+ if not meta:
423
+ continue
424
+
425
+ for raw_target in extract_markdown_links(meta):
426
+ target = normalize_link_target(raw_target)
427
+
428
+ # Skip external http(s) URLs.
429
+ if is_external_http_url(target):
430
+ continue
431
+
432
+ # Skip mailto links.
433
+ if target.startswith("mailto:"):
434
+ continue
435
+
436
+ # Strip anchor for path existence checks.
437
+ path_part = target.split("#", 1)[0]
438
+ if not path_part:
439
+ # Anchor-only link (same file).
440
+ continue
441
+
442
+ if path_part.startswith("/"):
443
+ resolved = (REPO_ROOT / path_part.lstrip("/")).resolve(strict=False)
444
+ else:
445
+ resolved = (p.parent / path_part).resolve(strict=False)
446
+
447
+ # Allow directory links by treating them as README.md.
448
+ if resolved.is_dir():
449
+ resolved = resolved / "README.md"
450
+
451
+ # Guard against repo-escape paths.
452
+ try:
453
+ resolved.relative_to(REPO_ROOT)
454
+ except ValueError:
455
+ findings.append(
456
+ Finding(
457
+ "WARN",
458
+ f"Metadata link escapes repo root (**{key}**): {target} (in {repo_rel(p)})",
459
+ )
460
+ )
461
+ continue
462
+
463
+ if not resolved.exists():
464
+ findings.append(
465
+ Finding(
466
+ "WARN",
467
+ f"Broken **{key}** link: {target} (in {repo_rel(p)})",
468
+ )
469
+ )
470
+
471
+ tags_line = find_meta_line(text, "Tags")
472
+ if tags_line is not None:
473
+ tags = {t.lower() for t in re.findall(r"#[A-Za-z0-9_-]+", tags_line)}
474
+ if not (tags & DIMENSION_TAGS):
475
+ findings.append(
476
+ Finding(
477
+ "WARN",
478
+ f"Missing dimension tag in **Tags** metadata (need one of: {', '.join(sorted(DIMENSION_TAGS))}): {repo_rel(p)}",
479
+ )
480
+ )
481
+
482
+ for hdr in ("## Definition", "## Requirements", "## Anti-Patterns", "## Examples"):
483
+ if hdr not in text:
484
+ findings.append(Finding("WARN", f"Missing section '{hdr}': {repo_rel(p)}"))
485
+
486
+ if "## Examples" in text:
487
+ if "**Bad:**" not in text:
488
+ findings.append(Finding("WARN", f"Missing **Bad:** example: {repo_rel(p)}"))
489
+ if "**Good:**" not in text:
490
+ findings.append(Finding("WARN", f"Missing **Good:** example: {repo_rel(p)}"))
491
+
492
+ src = find_meta_line(text, "Source")
493
+ if src:
494
+ link = extract_first_link(src)
495
+ # Internal meta docs should prefer anchors.
496
+ if link and "docs/meta/" in link and "#" not in link:
497
+ findings.append(
498
+ Finding("WARN", f"Internal Source link missing #anchor: {repo_rel(p)}")
499
+ )
500
+
501
+
502
+ def validate_rule_indexing(p: Path, findings: list[Finding]) -> None:
503
+ """Validate that atomic rules are indexed by their parent README."""
504
+
505
+ parent = p.parent
506
+ readme = parent / "README.md"
507
+ if not readme.exists():
508
+ findings.append(
509
+ Finding("WARN", f"Missing directory index README.md: {repo_rel(parent)}")
510
+ )
511
+ return
512
+
513
+ readme_text = read_text(readme)
514
+ if p.name not in readme_text:
515
+ findings.append(
516
+ Finding(
517
+ "WARN",
518
+ f"File not indexed in {repo_rel(readme)}: {p.name}",
519
+ )
520
+ )
521
+
522
+
523
+ def parse_registry_capabilities() -> set[str]:
524
+ """Parse capability keys from models/registry.yaml.
525
+
526
+ This is a minimal YAML parser by convention:
527
+ - Capabilities live under the top-level key: `capabilities:`
528
+ - Each capability is a 2-space indented key.
529
+ """
530
+
531
+ p = REPO_ROOT / "models" / "registry.yaml"
532
+ caps: set[str] = set()
533
+ in_caps = False
534
+
535
+ for line in read_text(p).splitlines():
536
+ if line.strip() == "capabilities:":
537
+ in_caps = True
538
+ continue
539
+ if not in_caps:
540
+ continue
541
+
542
+ m = re.match(r"^\s{2}([a-z][a-z0-9_-]*):\s*$", line)
543
+ if m:
544
+ caps.add(m.group(1))
545
+
546
+ return caps
547
+
548
+
549
+ def parse_agent_capabilities() -> tuple[list[tuple[str, str]], list[str]]:
550
+ """Extract (file, capability) pairs from agents/roles specs.
551
+
552
+ Role specs declare `Capability:` inside a `<Meta>` block.
553
+ """
554
+
555
+ roles_dir = REPO_ROOT / "agents" / "roles"
556
+ out: list[tuple[str, str]] = []
557
+ missing: list[str] = []
558
+
559
+ meta_re = re.compile(r"(?s)<Meta>\s*(.*?)\s*</Meta>")
560
+ cap_re = re.compile(r"(?m)^\s*Capability:\s*([a-z][a-z0-9_-]*)\s*$")
561
+
562
+ for p in walk_files(roles_dir):
563
+ if not is_markdown(p) or is_readme(p):
564
+ continue
565
+
566
+ text = read_text(p)
567
+ meta = meta_re.search(text)
568
+ if not meta:
569
+ # Not a role spec (e.g., shared Markdown fragments).
570
+ continue
571
+
572
+ rel = repo_rel(p)
573
+ m = cap_re.search(meta.group(1))
574
+ if not m:
575
+ missing.append(rel)
576
+ continue
577
+
578
+ out.append((rel, m.group(1)))
579
+
580
+ return out, missing
581
+
582
+
583
+ def validate_agent_capabilities(findings: list[Finding]) -> None:
584
+ """Ensure every agent capability exists in the registry."""
585
+
586
+ caps = parse_registry_capabilities()
587
+
588
+ pairs, missing = parse_agent_capabilities()
589
+ if not pairs and not missing:
590
+ findings.append(
591
+ Finding(
592
+ "ERROR",
593
+ "No agent capabilities found under agents/roles (Capability:<...> in <Meta>); parsing may be broken",
594
+ )
595
+ )
596
+
597
+ for file_path in missing:
598
+ findings.append(Finding("ERROR", f"Agent spec missing Capability in <Meta>: {file_path}"))
599
+
600
+ for file_path, cap in pairs:
601
+ if cap not in caps:
602
+ findings.append(
603
+ Finding("ERROR", f"Agent capability not in registry: {file_path} -> {cap}")
604
+ )
605
+
606
+
607
+ def main() -> int:
608
+ """Run validation and return a shell exit code.
609
+
610
+ WARN findings fail validation unless baselined.
611
+ """
612
+
613
+ parser = argparse.ArgumentParser(add_help=True)
614
+ parser.add_argument(
615
+ "--strict",
616
+ action="store_true",
617
+ help="DEPRECATED (no-op): validation is always strict; retained for compatibility.",
618
+ )
619
+ parser.add_argument(
620
+ "--baseline",
621
+ type=str,
622
+ default=None,
623
+ help="Path to a newline-delimited list of known warnings to ignore.",
624
+ )
625
+ parser.add_argument(
626
+ "--write-baseline",
627
+ type=str,
628
+ default=None,
629
+ help="Write current warning messages to the given file and exit 0.",
630
+ )
631
+ parser.add_argument(
632
+ "--check-urls",
633
+ action="store_true",
634
+ help="Opt-in: validate external http(s) URLs found in Markdown files.",
635
+ )
636
+ parser.add_argument(
637
+ "--url-timeout",
638
+ type=float,
639
+ default=8.0,
640
+ help="Timeout in seconds for external URL checks (default: 8.0).",
641
+ )
642
+ parser.add_argument(
643
+ "--url-ignore",
644
+ action="append",
645
+ default=[],
646
+ help="External URL to ignore during --check-urls (repeatable).",
647
+ )
648
+ args = parser.parse_args()
649
+
650
+ findings: list[Finding] = []
651
+
652
+ # Spec versioning is a hard invariant.
653
+ validate_spec_version(findings)
654
+
655
+ # Validate rules (atomic rules + policies + templates).
656
+ for p in walk_files(REPO_ROOT / "rules"):
657
+ if is_rule_file(p):
658
+ validate_rule_shape_with_options(p, findings, enforce_filename=True)
659
+ validate_rule_indexing(p, findings)
660
+ continue
661
+
662
+ if is_policy_file(p) or is_rules_template_file(p) or is_default_law_file(p):
663
+ validate_rule_shape_with_options(p, findings, enforce_filename=False)
664
+ validate_rule_indexing(p, findings)
665
+
666
+ # Ensure agents only use declared capabilities.
667
+ validate_agent_capabilities(findings)
668
+
669
+ if args.check_urls:
670
+ ignored_urls = set(DEFAULT_IGNORED_URLS)
671
+ ignored_urls.update(args.url_ignore)
672
+ validate_external_urls(findings, timeout=args.url_timeout, ignored_urls=ignored_urls)
673
+
674
+ warns = [f for f in findings if f.level == "WARN"]
675
+ if args.write_baseline:
676
+ out_path = REPO_ROOT / args.write_baseline
677
+ out_lines = sorted({f.message for f in warns})
678
+ out_path.write_text("\n".join(out_lines) + "\n", encoding="utf-8")
679
+ print(f"OK: wrote baseline to {repo_rel(out_path)}")
680
+ return 0
681
+
682
+ baseline: set[str] = set()
683
+ if args.baseline:
684
+ baseline_path = (
685
+ (REPO_ROOT / args.baseline).resolve()
686
+ if not os.path.isabs(args.baseline)
687
+ else Path(args.baseline)
688
+ )
689
+ if not baseline_path.exists():
690
+ findings.append(
691
+ Finding(
692
+ "ERROR",
693
+ f"Baseline file does not exist: {repo_rel(baseline_path)}",
694
+ )
695
+ )
696
+ else:
697
+ baseline = {
698
+ line.strip()
699
+ for line in read_text(baseline_path).splitlines()
700
+ if line.strip() and not line.strip().startswith("#")
701
+ }
702
+
703
+ errors = [f for f in findings if f.level == "ERROR"]
704
+ warns = [f for f in findings if f.level == "WARN"]
705
+
706
+ if baseline:
707
+ warns = [w for w in warns if w.message not in baseline]
708
+
709
+ if errors:
710
+ print(f"FAILED: {len(errors)} error(s)", file=sys.stderr)
711
+ for f in errors:
712
+ print(f"- {f.message}", file=sys.stderr)
713
+
714
+ if warns:
715
+ print(f"WARN: {len(warns)} warning(s)", file=sys.stderr)
716
+ for f in warns:
717
+ print(f"- {f.message}", file=sys.stderr)
718
+
719
+ if errors:
720
+ return 1
721
+
722
+ if warns:
723
+ return 1
724
+
725
+ print("OK: validation passed")
726
+ return 0
727
+
728
+
729
+ if __name__ == "__main__":
730
+ raise SystemExit(main())