@svayam-opensource/prj 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,391 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Workspace repo validator.
4
+
5
+ Runs schema, registry, lifecycle, and cross-reference checks against the
6
+ repo's working-tree state. Used by scripts/test-merge.sh as the pre-merge
7
+ gate, and by CI on PRs to main/publish.
8
+
9
+ Usage:
10
+ python3 scripts/validate/run.py [REPO_ROOT]
11
+
12
+ Exits 0 on pass, 1 on any validation failure.
13
+
14
+ Notes:
15
+ Framework files (all *.md, *.yaml, *.yml, CODEOWNERS) are scanned for
16
+ leftover {{PLACEHOLDER}} tokens unconditionally. Direction A: framework
17
+ files never carry placeholders — org values live in org-config.yaml only.
18
+ A placeholder anywhere is a regression.
19
+ """
20
+
21
+ import os
22
+ import re
23
+ import sys
24
+ from pathlib import Path
25
+
26
+ sys.path.insert(0, str(Path(__file__).parent))
27
+ from check_knowledge import check_knowledge # noqa: E402
28
+ from check_secrets import check_secrets # noqa: E402
29
+ from check_protocol import check_protocol # noqa: E402
30
+
31
+ try:
32
+ import yaml
33
+ except ImportError:
34
+ print("[FAIL] PyYAML not installed. Run: bash scripts/install-deps.sh", file=sys.stderr)
35
+ sys.exit(2)
36
+
37
+
38
+ ALLOWED_STATUSES = {"proposed", "active", "paused", "completed", "cancelled"}
39
+ ALLOWED_KNOWLEDGE_STATUSES = {
40
+ None, "pending_review", "merged", "rejected", "under_revision", "abandoned",
41
+ }
42
+ REQUIRED_PROJECT_FIELDS = ["id", "slug", "status"]
43
+ REQUIRED_CONFIG_FIELDS = [
44
+ "org_name", "org_short_name", "org_slug", "org_slug_lower",
45
+ "org_repo_url", "github_org", "workspace_repo",
46
+ "default_branch", "default_code_branch", "agent_work_root",
47
+ "policy_owner_email", "policy_owner_github",
48
+ ]
49
+ PLACEHOLDER_RE = re.compile(r"\{\{[A-Z_a-z0-9]+\}\}")
50
+ PLACEHOLDER_SCAN_SUFFIXES = {".md", ".yaml", ".yml", ".mdc"}
51
+ PLACEHOLDER_SCAN_NAMES = {"CODEOWNERS"}
52
+
53
+
54
+ # ── Schema ──────────────────────────────────────────────────────────────────
55
+
56
+ def check_schema(repo_root: Path) -> list[str]:
57
+ errors: list[str] = []
58
+
59
+ config_path = repo_root / "org-config.yaml"
60
+ if not config_path.exists():
61
+ return [f"org-config.yaml not found at {config_path}"]
62
+ try:
63
+ config = yaml.safe_load(config_path.read_text())
64
+ except yaml.YAMLError as e:
65
+ return [f"org-config.yaml does not parse: {e}"]
66
+ if not isinstance(config, dict):
67
+ return [f"org-config.yaml: top-level must be a mapping, got {type(config).__name__}"]
68
+
69
+ # Template state: org-config.yaml ships from TEMPLATE with all values
70
+ # empty. After ./setup.sh runs, values are populated. The validator must
71
+ # accept both: structure (keys present) is always required; populated
72
+ # values are only required post-setup. Detect template state from org_name.
73
+ is_template_state = not bool(config.get("org_name"))
74
+
75
+ for field in REQUIRED_CONFIG_FIELDS:
76
+ if field not in config:
77
+ errors.append(f"org-config.yaml: missing required field '{field}'")
78
+ elif not is_template_state and config[field] in (None, ""):
79
+ errors.append(f"org-config.yaml: '{field}' is empty")
80
+
81
+ registry_path = repo_root / "registry.yaml"
82
+ if not registry_path.exists():
83
+ errors.append("registry.yaml not found")
84
+ return errors
85
+ try:
86
+ registry = yaml.safe_load(registry_path.read_text())
87
+ except yaml.YAMLError as e:
88
+ errors.append(f"registry.yaml does not parse: {e}")
89
+ return errors
90
+ if not isinstance(registry, dict):
91
+ errors.append("registry.yaml: top-level must be a mapping")
92
+ return errors
93
+
94
+ if not isinstance(registry.get("last_issued"), int) or registry["last_issued"] < 0:
95
+ errors.append(
96
+ f"registry.yaml: last_issued must be non-negative int, "
97
+ f"got {registry.get('last_issued')!r}"
98
+ )
99
+ projects = registry.get("projects")
100
+ if projects is not None and not isinstance(projects, list):
101
+ errors.append(
102
+ f"registry.yaml: 'projects' must be a list, got {type(projects).__name__}"
103
+ )
104
+
105
+ projects_dir = repo_root / "projects"
106
+ if projects_dir.is_dir():
107
+ for project_dir in sorted(projects_dir.iterdir()):
108
+ if not project_dir.is_dir():
109
+ continue
110
+ pf = project_dir / "project.yaml"
111
+ if not pf.exists():
112
+ continue # folder may exist for staging; covered by registry check
113
+ rel = pf.relative_to(repo_root)
114
+ try:
115
+ p = yaml.safe_load(pf.read_text())
116
+ except yaml.YAMLError as e:
117
+ errors.append(f"{rel}: does not parse: {e}")
118
+ continue
119
+ if not isinstance(p, dict):
120
+ errors.append(f"{rel}: top-level must be a mapping")
121
+ continue
122
+ for field in REQUIRED_PROJECT_FIELDS:
123
+ if field not in p or p[field] in (None, ""):
124
+ errors.append(f"{rel}: missing required field '{field}'")
125
+ if p.get("status") not in ALLOWED_STATUSES:
126
+ errors.append(
127
+ f"{rel}: status '{p.get('status')!r}' not in {sorted(ALLOWED_STATUSES)}"
128
+ )
129
+ ks = p.get("knowledge_status")
130
+ if ks not in ALLOWED_KNOWLEDGE_STATUSES:
131
+ errors.append(
132
+ f"{rel}: knowledge_status '{ks!r}' invalid"
133
+ )
134
+
135
+ return errors
136
+
137
+
138
+ # ── Registry consistency ────────────────────────────────────────────────────
139
+
140
+ def check_registry(repo_root: Path) -> list[str]:
141
+ errors: list[str] = []
142
+ registry_path = repo_root / "registry.yaml"
143
+ try:
144
+ registry = yaml.safe_load(registry_path.read_text())
145
+ except Exception as e:
146
+ return [f"registry.yaml: {e}"]
147
+ if not isinstance(registry, dict):
148
+ return ["registry.yaml: top-level must be a mapping"]
149
+
150
+ projects = registry.get("projects") or []
151
+ last_issued = registry.get("last_issued", 0)
152
+
153
+ nnn_seen: dict[int, str] = {}
154
+ max_nnn = 0
155
+ for entry in projects:
156
+ if not isinstance(entry, dict):
157
+ continue
158
+ pid = entry.get("id") or ""
159
+ # Accept any uppercase prefix for backwards compatibility with pre-v0.2.0
160
+ # orgs whose projects use <ORG_SLUG>-NNN-slug. New projects (v0.2.0+)
161
+ # use the literal PRJ- prefix.
162
+ m = re.match(r"^[A-Z]+-(\d+)-", pid)
163
+ if not m:
164
+ errors.append(f"registry.yaml: project entry has invalid id format: {pid!r} (expected <PREFIX>-NNN-slug)")
165
+ continue
166
+ nnn = int(m.group(1))
167
+ if nnn in nnn_seen:
168
+ errors.append(
169
+ f"registry.yaml: duplicate NNN {nnn:03d} ({nnn_seen[nnn]} and {pid})"
170
+ )
171
+ nnn_seen[nnn] = pid
172
+ max_nnn = max(max_nnn, nnn)
173
+
174
+ if isinstance(last_issued, int) and last_issued < max_nnn:
175
+ errors.append(
176
+ f"registry.yaml: last_issued ({last_issued}) < max NNN in projects[] ({max_nnn})"
177
+ )
178
+
179
+ projects_dir = repo_root / "projects"
180
+ registered_ids = {e.get("id") for e in projects if isinstance(e, dict) and e.get("id")}
181
+
182
+ for entry in projects:
183
+ if not isinstance(entry, dict):
184
+ continue
185
+ pid = entry.get("id")
186
+ if not pid:
187
+ continue
188
+ folder = projects_dir / pid
189
+ if not folder.is_dir():
190
+ errors.append(f"registry.yaml: project '{pid}' has no folder at projects/{pid}")
191
+
192
+ if projects_dir.is_dir():
193
+ for folder in sorted(projects_dir.iterdir()):
194
+ if not folder.is_dir():
195
+ continue
196
+ if folder.name in {".gitkeep"}:
197
+ continue
198
+ if folder.name not in registered_ids:
199
+ errors.append(
200
+ f"projects/{folder.name}: folder exists but no entry in registry.yaml"
201
+ )
202
+
203
+ return errors
204
+
205
+
206
+ # ── Lifecycle invariants ────────────────────────────────────────────────────
207
+
208
+ def check_lifecycle(repo_root: Path) -> list[str]:
209
+ errors: list[str] = []
210
+ projects_dir = repo_root / "projects"
211
+ if not projects_dir.is_dir():
212
+ return errors
213
+
214
+ for project_dir in sorted(projects_dir.iterdir()):
215
+ if not project_dir.is_dir():
216
+ continue
217
+ pf = project_dir / "project.yaml"
218
+ if not pf.exists():
219
+ continue
220
+ rel = pf.relative_to(repo_root)
221
+ try:
222
+ p = yaml.safe_load(pf.read_text())
223
+ except Exception:
224
+ continue
225
+ if not isinstance(p, dict):
226
+ continue
227
+
228
+ status = p.get("status")
229
+ if status == "completed":
230
+ if not p.get("completed_at"):
231
+ errors.append(f"{rel}: status=completed but completed_at is null")
232
+ elif status == "cancelled":
233
+ if not p.get("cancellation_reason"):
234
+ errors.append(f"{rel}: status=cancelled but cancellation_reason is null")
235
+ if not p.get("cancelled_at"):
236
+ errors.append(f"{rel}: status=cancelled but cancelled_at is null")
237
+ elif status == "paused":
238
+ if not p.get("paused_at"):
239
+ errors.append(f"{rel}: status=paused but paused_at is null")
240
+ elif status == "active":
241
+ if not p.get("started_at"):
242
+ errors.append(f"{rel}: status=active but started_at is null")
243
+ if p.get("paused_at"):
244
+ errors.append(f"{rel}: status=active but paused_at is set ({p['paused_at']})")
245
+
246
+ if p.get("completed_at") and p.get("cancelled_at"):
247
+ errors.append(f"{rel}: both completed_at and cancelled_at are set")
248
+
249
+ # Tasks-on-board model: tasks are not tracked in project.yaml (they are
250
+ # GitHub Issues + sub-branches), so there is no tasks[] to validate here.
251
+
252
+ return errors
253
+
254
+
255
+ # ── Cross-references ────────────────────────────────────────────────────────
256
+
257
+ def check_cross_refs(repo_root: Path) -> list[str]:
258
+ errors: list[str] = []
259
+
260
+ codeowners = repo_root / "CODEOWNERS"
261
+ if codeowners.exists():
262
+ for lineno, raw in enumerate(codeowners.read_text().splitlines(), 1):
263
+ line = raw.strip()
264
+ if not line or line.startswith("#"):
265
+ continue
266
+ parts = line.split()
267
+ if len(parts) < 2:
268
+ continue
269
+ path_pattern = parts[0]
270
+ check_path = path_pattern.lstrip("/").rstrip("/")
271
+ if not check_path:
272
+ continue
273
+ target = repo_root / check_path
274
+ if not target.exists():
275
+ errors.append(f"CODEOWNERS:{lineno}: path '{path_pattern}' does not exist")
276
+
277
+ # Framework files must NEVER contain {{PLACEHOLDER}} tokens — org values
278
+ # are read from org-config.yaml at runtime. .github/workflows/ files are
279
+ # excluded because they use GitHub Actions ${{ expr }} syntax legitimately.
280
+ for f in repo_root.rglob("*"):
281
+ if not f.is_file():
282
+ continue
283
+ rel_parts = f.relative_to(repo_root).parts
284
+ if any(part.startswith(".git") for part in rel_parts):
285
+ continue
286
+ if len(rel_parts) >= 2 and rel_parts[0] == ".github" and rel_parts[1] == "workflows":
287
+ continue
288
+ if f.suffix not in PLACEHOLDER_SCAN_SUFFIXES and f.name not in PLACEHOLDER_SCAN_NAMES:
289
+ continue
290
+ try:
291
+ text = f.read_text()
292
+ except Exception:
293
+ continue
294
+ for m in PLACEHOLDER_RE.finditer(text):
295
+ errors.append(
296
+ f"{f.relative_to(repo_root)}: leftover placeholder {m.group(0)}"
297
+ )
298
+
299
+ return errors
300
+
301
+
302
+ # ── Executable bits ─────────────────────────────────────────────────────────
303
+
304
+ # File mode in the git index must be 100755 for these scripts. A file
305
+ # committed as 100644 will fail with "permission denied" when an adopter
306
+ # runs ./scripts/X — the failure mode is silent until they hit it.
307
+ EXPECTED_EXEC_PATTERNS = (
308
+ re.compile(r"^prj$"),
309
+ re.compile(r"^setup\.sh$"),
310
+ re.compile(r"^scripts/.+\.sh$"),
311
+ re.compile(r"^scripts/validate/.+\.py$"),
312
+ re.compile(r"^tests/.+\.sh$"),
313
+ )
314
+
315
+
316
+ def check_exec_bits(repo_root: Path) -> list[str]:
317
+ import subprocess
318
+ errors: list[str] = []
319
+ try:
320
+ result = subprocess.run(
321
+ ["git", "-C", str(repo_root), "ls-files", "-s"],
322
+ capture_output=True, text=True, check=True,
323
+ )
324
+ except (subprocess.CalledProcessError, FileNotFoundError):
325
+ # Not a git repo, or git unavailable — skip silently
326
+ return errors
327
+
328
+ for line in result.stdout.splitlines():
329
+ # Format: <mode> <hash> <stage>\t<path>
330
+ if "\t" not in line:
331
+ continue
332
+ meta, path = line.split("\t", 1)
333
+ parts = meta.split()
334
+ if len(parts) < 1:
335
+ continue
336
+ mode = parts[0]
337
+ # Check if path matches any expected-executable pattern
338
+ is_expected_exec = any(p.match(path) for p in EXPECTED_EXEC_PATTERNS)
339
+ if is_expected_exec and mode != "100755":
340
+ errors.append(
341
+ f"{path}: committed mode is {mode}, expected 100755 "
342
+ f"(run: chmod +x {path} && git update-index --chmod=+x {path})"
343
+ )
344
+ return errors
345
+
346
+
347
+ # ── Runner ──────────────────────────────────────────────────────────────────
348
+
349
+ CHECKS = [
350
+ ("schema", check_schema),
351
+ ("registry", check_registry),
352
+ ("lifecycle", check_lifecycle),
353
+ ("cross-refs", check_cross_refs),
354
+ ("exec-bits", check_exec_bits),
355
+ ("knowledge-org", check_knowledge),
356
+ ("secrets", check_secrets),
357
+ ("protocol", check_protocol),
358
+ ]
359
+
360
+
361
+ def main() -> int:
362
+ repo_root = Path(sys.argv[1] if len(sys.argv) > 1 else ".").resolve()
363
+
364
+ if not (repo_root / "registry.yaml").exists():
365
+ print(
366
+ f"[FAIL] {repo_root} does not look like a workspace repo (no registry.yaml)",
367
+ file=sys.stderr,
368
+ )
369
+ return 1
370
+
371
+ total_errors = 0
372
+ for name, check in CHECKS:
373
+ errors = check(repo_root)
374
+ if errors:
375
+ print(f"[FAIL] {name} ({len(errors)} error{'s' if len(errors) != 1 else ''}):")
376
+ for e in errors:
377
+ print(f" - {e}")
378
+ total_errors += len(errors)
379
+ else:
380
+ print(f"[PASS] {name}")
381
+
382
+ print()
383
+ if total_errors:
384
+ print(f"=== {total_errors} validation error(s) ===")
385
+ return 1
386
+ print("=== all validators passed ===")
387
+ return 0
388
+
389
+
390
+ if __name__ == "__main__":
391
+ sys.exit(main())